From 652114c6419d8ee877b2ce593058ec0b83afd7d3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Oct 2021 11:44:23 +0200 Subject: [PATCH 001/176] [affiliationPropagation] first try. preparetion --- .../main/java/eu/dnetlib/dhp/KeyValueSet.java | 26 ++ .../PrepareResultInstRepoAssociation.java | 5 +- .../ResultOrganizationSet.java | 26 -- ...arkResultToOrganizationFromIstRepoJob.java | 25 +- .../PrepareInfo.java | 118 +++++++ .../SparkResultToOrganizationFromSemRel.java | 29 ++ .../PrepareInfoJobTest.java | 323 ++++++++++++++++++ .../childparenttest1/relation | 7 + .../childparenttest2/relation | 7 + .../resultorganizationtest/relation | 7 + 10 files changed, 533 insertions(+), 40 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/KeyValueSet.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/KeyValueSet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/KeyValueSet.java new file mode 100644 index 000000000..57ab716b3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/KeyValueSet.java @@ -0,0 +1,26 @@ + +package eu.dnetlib.dhp; + +import java.io.Serializable; +import java.util.ArrayList; + +public class KeyValueSet implements Serializable { + private String key; + private ArrayList valueSet; + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public ArrayList getValueSet() { + return valueSet; + } + + public void setValueSet(ArrayList valueSet) { + this.valueSet = valueSet; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 0ef5ca181..dbd3bccfb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.KeyValueSet; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -124,7 +125,7 @@ public class PrepareResultInstRepoAssociation { private static void prepareAlreadyLinkedAssociation( SparkSession spark, String alreadyLinkedPath) { - String query = "Select source resultId, collect_set(target) organizationSet " + String query = "Select source key, collect_set(target) valueSet " + "from relation " + "where datainfo.deletedbyinference = false " + "and lower(relClass) = '" @@ -134,7 +135,7 @@ public class PrepareResultInstRepoAssociation { spark .sql(query) - .as(Encoders.bean(ResultOrganizationSet.class)) + .as(Encoders.bean(KeyValueSet.class)) // TODO retry to stick with datasets .toJavaRDD() .map(r -> OBJECT_MAPPER.writeValueAsString(r)) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java deleted file mode 100644 index 3bce14cdb..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java +++ /dev/null @@ -1,26 +0,0 @@ - -package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; - -import java.io.Serializable; -import java.util.ArrayList; - -public class ResultOrganizationSet implements Serializable { - private String resultId; - private ArrayList organizationSet; - - public String getResultId() { - return resultId; - } - - public void setResultId(String resultId) { - this.resultId = resultId; - } - - public ArrayList getOrganizationSet() { - return organizationSet; - } - - public void setOrganizationSet(ArrayList organizationSet) { - this.organizationSet = organizationSet; - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 63824f1a8..be0a4804a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.KeyValueSet; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -28,7 +29,7 @@ public class SparkResultToOrganizationFromIstRepoJob { private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromIstRepoJob.class); - private static final String RESULT_ORGANIZATIONSET_QUERY = "SELECT id resultId, collect_set(organizationId) organizationSet " + private static final String RESULT_ORGANIZATIONSET_QUERY = "SELECT id key, collect_set(organizationId) valueSet " + "FROM ( SELECT id, organizationId " + "FROM rels " + "JOIN cfhb " @@ -107,14 +108,14 @@ public class SparkResultToOrganizationFromIstRepoJob { Dataset dsOrg = readPath(spark, datasourceorganization, DatasourceOrganization.class); - Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); + Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); - Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class); + Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, KeyValueSet.class); potentialUpdates .joinWith( alreadyLinked, - potentialUpdates.col("resultId").equalTo(alreadyLinked.col("resultId")), + potentialUpdates.col("key").equalTo(alreadyLinked.col("key")), "left_outer") .flatMap(createRelationFn(), Encoders.bean(Relation.class)) .write() @@ -123,18 +124,18 @@ public class SparkResultToOrganizationFromIstRepoJob { .json(outputPath); } - private static FlatMapFunction, Relation> createRelationFn() { + private static FlatMapFunction, Relation> createRelationFn() { return value -> { List newRelations = new ArrayList<>(); - ResultOrganizationSet potentialUpdate = value._1(); - Optional alreadyLinked = Optional.ofNullable(value._2()); - List organizations = potentialUpdate.getOrganizationSet(); + KeyValueSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + List organizations = potentialUpdate.getValueSet(); alreadyLinked .ifPresent( resOrg -> resOrg - .getOrganizationSet() + .getValueSet() .forEach(organizations::remove)); - String resultId = potentialUpdate.getResultId(); + String resultId = potentialUpdate.getKey(); organizations .forEach( orgId -> { @@ -165,7 +166,7 @@ public class SparkResultToOrganizationFromIstRepoJob { }; } - private static Dataset getPotentialRelations( + private static Dataset getPotentialRelations( SparkSession spark, String inputPath, Class resultClazz, @@ -179,7 +180,7 @@ public class SparkResultToOrganizationFromIstRepoJob { return spark .sql(RESULT_ORGANIZATIONSET_QUERY) - .as(Encoders.bean(ResultOrganizationSet.class)); + .as(Encoders.bean(KeyValueSet.class)); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java new file mode 100644 index 000000000..34817d9a9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -0,0 +1,118 @@ +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; + + +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; + +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.*; + +import static eu.dnetlib.dhp.PropagationConstant.*; + +/** + * Searches for all the association between result and organization already existing in the graph + * Creates also the parenthood hierarchy from the organizations + */ + +public class PrepareInfo implements Serializable { + + //leggo le relazioni e seleziono quelle fra result ed organizzazioni + //raggruppo per result e salvo + //r => {o1, o2, o3} + + //leggo le relazioni fra le organizzazioni e creo la gerarchia delle parentele: + //hashMap key organizzazione -> value tutti i suoi padri + // o => {p1, p2} + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class); + + //associa i figli con tutti i loro padri + private static final String relOrgQuery = + "SELECT target key, collect_set(source) as valueSet " + + "FROM relation " + + "WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() + "' and datainfo.deletedbyinference = false " + + "GROUP BY target"; + + private static final String relResQuery = "SELECT source key, collect_set(target) as valueSet " + + "FROM relation " + + "WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + "' and datainfo.deletedbyinference = false " + + "GROUP BY source"; + + + public static void prepareChildrenParent(SparkSession spark, String inputPath, String childParentOrganizationPath){ + Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); + relation.createOrReplaceTempView("relation"); + + spark + .sql(relOrgQuery) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(childParentOrganizationPath); + } + + public static void prepareResultOrgnization(SparkSession spark, String inputPath, String resultOrganizationPath){ + spark + .sql(relResQuery) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(resultOrganizationPath); + } + + private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, + String currentIterationPath, String resultOrganizationPath){ + Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); + relation.createOrReplaceTempView("relation"); + + spark + .sql(relOrgQuery) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(childParentOrganizationPath); + + spark + .sql(relResQuery) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(resultOrganizationPath); + + Dataset children = spark.sql("Select distinct target from relation where relclass='IsParentOf' and datainfo.deletedbyinference = false").as(Encoders.STRING()); + + Dataset parent = spark.sql("Select distinct source from relation where relclass='IsParentOf' and datainfo.deletedbyinference = false").as(Encoders.STRING()); + + //prendo dalla join i risultati che hanno solo il lato sinistro: sono foglie + children.joinWith(parent, children.col("_1").equalTo(parent.col("_1")), "left") + .map((MapFunction, String>) value -> { + if (Optional.ofNullable(value._2()).isPresent()) { + return null; + } + + return value._1(); + }, Encoders.STRING()).filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .json(currentIterationPath); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java new file mode 100644 index 000000000..bd1b7803f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -0,0 +1,29 @@ +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +public class SparkResultToOrganizationFromSemRel { + + //per ogni figlio nel file delle organizzazioni + //devo fare una roba iterativa che legge info da un file e le cambia via via + //passo 1: creo l'informazione iniale: organizzazioni che non hanno figli con almeno un padre + //ogni organizzazione punta alla lista di padri + //eseguo la propagazione dall'organizzazione figlio all'organizzazione padre + //ricerco nel dataset delle relazioni se l'organizzazione a cui ho propagato ha, a sua volta, dei padri + //e propago anche a quelli e cosi' via fino a che arrivo ad organizzazione senza padre + + //organizationFile: + //f => {p1, p2, ..., pn} + //resultFile + //o => {r1, r2, ... rm} + + //supponiamo che f => {r1, r2} e che nessuno dei padri abbia gia' l'associazione con questi result + //quindi + //p1 => {r1, r2} + //p2 => {r1, r2} + //pn => {r1, r2} + + + //mi serve un file con tutta la gerarchia per organizzazioni + //un file con le organizzazioni foglia da joinare con l'altro file + //un file con le associazioni organizzazione -> result forse meglio result -> organization + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java new file mode 100644 index 000000000..1d3498d04 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java @@ -0,0 +1,323 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob; +import eu.dnetlib.dhp.schema.oaf.Relation; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +public class PrepareInfoJobTest { + + private static final Logger log = LoggerFactory.getLogger(PrepareInfoJobTest.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(PrepareInfoJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareInfoJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + + @Test + public void childParentTest1() { + + PrepareInfo.prepareChildrenParent(spark, getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") + .getPath(), workingDir.toString() + "/childParentOrg/"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile( workingDir.toString() + "/childParentOrg/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(6, verificationDs.count()); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertEquals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList().get(0).getValueSet().get(0)); + + Assertions.assertEquals(2, verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList().get(0).getValueSet().contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions.assertTrue(verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList().get(0).getValueSet().contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList().get(0).getValueSet().contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList().get(0).getValueSet().contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList().get(0).getValueSet().contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") + .collectAsList().get(0).getValueSet().contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + } + + + + @Test + public void childParentTest2() { + + PrepareInfo.prepareChildrenParent(spark, getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2") + .getPath(), workingDir.toString() + "/childParentOrg/"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile( workingDir.toString() + "/childParentOrg/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(5, verificationDs.count()); + + Assertions.assertEquals(0, verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'").count()); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertEquals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList().get(0).getValueSet().get(0)); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList().get(0).getValueSet().contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList().get(0).getValueSet().contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList().get(0).getValueSet().contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList().get(0).getValueSet().size()); + Assertions.assertTrue(verificationDs.filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") + .collectAsList().get(0).getValueSet().contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + } + + + @Test + public void resultOrganizationTest1(){ + + } + + /** + * All the possible updates will produce a new relation. No relations are already linked in the grpha + * + * @throws Exception + */ + @Test + void UpdateTenTest() throws Exception { + final String potentialUpdatePath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( + new String[] { + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-hive_metastore_uris", "", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", potentialUpdatePath, + "-alreadyLinkedPath", alreadyLinkedPath, + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + // got 20 new relations because "produces" and "isProducedBy" are added + Assertions.assertEquals(10, tmp.count()); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(5, verificationDs.filter("relClass = 'produces'").count()); + Assertions.assertEquals(5, verificationDs.filter("relClass = 'isProducedBy'").count()); + + Assertions + .assertEquals( + 5, + verificationDs + .filter( + (FilterFunction) r -> r.getSource().startsWith("50") + && r.getTarget().startsWith("40") + && r.getRelClass().equals("isProducedBy")) + .count()); + Assertions + .assertEquals( + 5, + verificationDs + .filter( + (FilterFunction) r -> r.getSource().startsWith("40") + && r.getTarget().startsWith("50") + && r.getRelClass().equals("produces")) + .count()); + + verificationDs.createOrReplaceTempView("temporary"); + + Assertions + .assertEquals( + 10, + spark + .sql( + "Select * from temporary where datainfo.inferenceprovenance = 'propagation'") + .count()); + } + + /** + * One of the relations in the possible updates is already linked to the project in the graph. All the others are + * not. There will be 9 new associations leading to 18 new relations + * + * @throws Exception + */ + @Test + void UpdateMixTest() throws Exception { + final String potentialUpdatepath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( + new String[] { + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-hive_metastore_uris", "", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", potentialUpdatepath, + "-alreadyLinkedPath", alreadyLinkedPath, + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + // JavaRDD tmp = sc.textFile("/tmp/relation") + // .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + // got 20 new relations because "produces" and "isProducedBy" are added + Assertions.assertEquals(8, tmp.count()); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(4, verificationDs.filter("relClass = 'produces'").count()); + Assertions.assertEquals(4, verificationDs.filter("relClass = 'isProducedBy'").count()); + + Assertions + .assertEquals( + 4, + verificationDs + .filter( + (FilterFunction) r -> r.getSource().startsWith("50") + && r.getTarget().startsWith("40") + && r.getRelClass().equals("isProducedBy")) + .count()); + Assertions + .assertEquals( + 4, + verificationDs + .filter( + (FilterFunction) r -> r.getSource().startsWith("40") + && r.getTarget().startsWith("50") + && r.getRelClass().equals("produces")) + .count()); + + verificationDs.createOrReplaceTempView("temporary"); + + Assertions + .assertEquals( + 8, + spark + .sql( + "Select * from temporary where datainfo.inferenceprovenance = 'propagation'") + .count()); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation new file mode 100644 index 000000000..c63a2e0ac --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation new file mode 100644 index 000000000..54589de32 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation new file mode 100644 index 000000000..5aeabb71b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file From d0ef7d91c519e3ed6f12f82dc94aac46dd211812 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 26 Oct 2021 17:34:11 +0200 Subject: [PATCH 002/176] adding test resource --- .../resulttoorganizationfromsemrel/ExecStep.java | 4 ++++ .../dhp/resulttoorganizationfromsemrel/Leaves.java | 4 ++++ .../ExecStepTest.java | 4 ++++ .../execstep/childParentOrg/childparent | 6 ++++++ .../execstep/currentIteration/leaves | 3 +++ .../execstep/relation | 14 ++++++++++++++ .../execstep/resultOrganization/resultorganization | 5 +++++ 7 files changed, 40 insertions(+) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java new file mode 100644 index 000000000..ed33a6297 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +public class ExecStep { +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java new file mode 100644 index 000000000..602dc1aaa --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +public class Leaves { +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java new file mode 100644 index 000000000..cd55a0688 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +public class ExecStepTest { +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent new file mode 100644 index 000000000..7d9ea588b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent @@ -0,0 +1,6 @@ +{"key":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]} +{"key":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} +{"key":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|openaire____::ec653e804967133b9436fdd30d3ff51d"]} +{"key":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0","valueSet":["20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} +{"key":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]} +{"key":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|doajarticles::03748bcb5d754c951efec9700e18a56d"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves new file mode 100644 index 000000000..3be9cae3b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves @@ -0,0 +1,3 @@ +{"value":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"value":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"value":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation new file mode 100644 index 000000000..db7db8fdd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation @@ -0,0 +1,14 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization new file mode 100644 index 000000000..b4e227227 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization @@ -0,0 +1,5 @@ +{"key":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} +{"key":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"]} +{"key":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"]} +{"key":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]} +{"key":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]} \ No newline at end of file From 09f36cffb8712b2e6dc63cf9523ab240bb4d9348 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 29 Oct 2021 11:20:03 +0200 Subject: [PATCH 003/176] [Enrichment: Propagation through parent-child relationships] First implementation, testing, and wf for propagation of result to organization through semantic relation --- .../dhp-enrichment/null/leaves/._SUCCESS.crc | Bin 0 -> 8 bytes ...38-4761-a051-bb601aecc3f0-c000.json.gz.crc | Bin 0 -> 12 bytes .../dhp-enrichment/null/leaves/_SUCCESS | 0 ...9-ad38-4761-a051-bb601aecc3f0-c000.json.gz | Bin 0 -> 20 bytes .../null/newRelation/._SUCCESS.crc | Bin 0 -> 8 bytes ...4a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 0 -> 12 bytes ...95-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 0 -> 12 bytes ...11-4513-a719-beee952de6ee-c000.json.gz.crc | Bin 0 -> 12 bytes ...90-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 0 -> 12 bytes ...b1-48ae-951b-6febe9db8857-c000.json.gz.crc | Bin 0 -> 12 bytes ...3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 0 -> 12 bytes ...12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc | Bin 0 -> 12 bytes ...d9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 0 -> 12 bytes ...50-47d3-90a8-5854c6c07c5e-c000.json.gz.crc | Bin 0 -> 12 bytes ...cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 0 -> 12 bytes ...48-4164-aba1-1e45fbadda99-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 0 -> 12 bytes ...88-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 0 -> 12 bytes ...e3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 0 -> 12 bytes ...8b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 0 -> 12 bytes ...d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 0 -> 12 bytes ...5f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 0 -> 12 bytes ...9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc | Bin 0 -> 12 bytes ...f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 0 -> 12 bytes ...16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 0 -> 12 bytes ...83-4d37-97e4-124125abfca5-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 0 -> 12 bytes ...74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 0 -> 12 bytes ...d5-4381-a03b-9f2ed0830707-c000.json.gz.crc | Bin 0 -> 12 bytes ...58-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 0 -> 12 bytes ...30-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 0 -> 12 bytes ...a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 0 -> 12 bytes ...8a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 0 -> 12 bytes ...02-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 0 -> 12 bytes ...44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 0 -> 12 bytes ...13-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 0 -> 12 bytes ...f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 0 -> 12 bytes ...4a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 0 -> 12 bytes ...90-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 0 -> 12 bytes ...3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 0 -> 12 bytes ...d9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 0 -> 12 bytes ...88-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 0 -> 12 bytes ...e3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 0 -> 12 bytes ...8b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 0 -> 12 bytes ...f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 0 -> 12 bytes ...16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 0 -> 12 bytes ...74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 0 -> 12 bytes ...30-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 0 -> 12 bytes ...a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 0 -> 12 bytes ...44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 0 -> 12 bytes ...13-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 0 -> 12 bytes ...f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 0 -> 12 bytes ...95-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 0 -> 12 bytes ...cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 0 -> 12 bytes ...d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 0 -> 12 bytes ...5f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 0 -> 12 bytes ...58-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 0 -> 12 bytes ...8a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 0 -> 12 bytes ...02-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 0 -> 12 bytes ...95-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 0 -> 12 bytes ...cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 0 -> 12 bytes ...d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 0 -> 12 bytes ...5f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 0 -> 12 bytes ...58-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 0 -> 12 bytes ...8a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 0 -> 12 bytes ...02-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 0 -> 12 bytes ...4a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 0 -> 12 bytes ...95-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 0 -> 12 bytes ...90-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 0 -> 12 bytes ...3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 0 -> 12 bytes ...cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 0 -> 12 bytes ...e3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 0 -> 12 bytes ...d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 0 -> 12 bytes ...5f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 0 -> 12 bytes ...74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 0 -> 12 bytes ...58-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 0 -> 12 bytes ...30-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 0 -> 12 bytes ...8a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 0 -> 12 bytes ...02-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 0 -> 12 bytes ...13-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 0 -> 12 bytes ...f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 0 -> 12 bytes ...4a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 0 -> 12 bytes ...90-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 0 -> 12 bytes ...3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 0 -> 12 bytes ...d9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 0 -> 12 bytes ...88-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 0 -> 12 bytes ...e3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 0 -> 12 bytes ...8b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 0 -> 12 bytes ...f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 0 -> 12 bytes ...16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 0 -> 12 bytes ...74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 0 -> 12 bytes ...30-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 0 -> 12 bytes ...a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 0 -> 12 bytes ...44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 0 -> 12 bytes ...13-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 0 -> 12 bytes ...f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 0 -> 12 bytes ...4a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 0 -> 12 bytes ...95-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 0 -> 12 bytes ...90-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 0 -> 12 bytes ...3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 0 -> 12 bytes ...cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 0 -> 12 bytes ...e3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 0 -> 12 bytes ...d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 0 -> 12 bytes ...5f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 0 -> 12 bytes ...74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 0 -> 12 bytes ...58-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 0 -> 12 bytes ...30-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 0 -> 12 bytes ...8a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 0 -> 12 bytes ...02-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 0 -> 12 bytes ...13-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 0 -> 12 bytes ...f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 0 -> 12 bytes ...4a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 0 -> 12 bytes ...90-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 0 -> 12 bytes ...3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 0 -> 12 bytes ...d9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 0 -> 12 bytes ...5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 0 -> 12 bytes ...88-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 0 -> 12 bytes ...e3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 0 -> 12 bytes ...8b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 0 -> 12 bytes ...f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 0 -> 12 bytes ...16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 0 -> 12 bytes ...74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 0 -> 12 bytes ...30-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 0 -> 12 bytes ...a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 0 -> 12 bytes ...44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 0 -> 12 bytes ...13-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 0 -> 12 bytes ...f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 0 -> 12 bytes ...4a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 0 -> 12 bytes ...90-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 0 -> 12 bytes ...3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 0 -> 12 bytes ...e3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 0 -> 12 bytes ...74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 0 -> 12 bytes ...30-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 0 -> 12 bytes ...13-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 0 -> 12 bytes ...f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 0 -> 12 bytes .../dhp-enrichment/null/newRelation/_SUCCESS | 0 ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 0 -> 20 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 0 -> 20 bytes ...4-8e11-4513-a719-beee952de6ee-c000.json.gz | Bin 0 -> 20 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 0 -> 20 bytes ...2-49b1-48ae-951b-6febe9db8857-c000.json.gz | Bin 0 -> 20 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 0 -> 20 bytes ...f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz | Bin 0 -> 20 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 0 -> 20 bytes ...a-e550-47d3-90a8-5854c6c07c5e-c000.json.gz | Bin 0 -> 20 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 0 -> 20 bytes ...7-c748-4164-aba1-1e45fbadda99-c000.json.gz | Bin 0 -> 20 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 0 -> 20 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 0 -> 20 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 0 -> 20 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 0 -> 20 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 0 -> 20 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 0 -> 20 bytes ...7-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz | Bin 0 -> 20 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 0 -> 20 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 0 -> 20 bytes ...e-6383-4d37-97e4-124125abfca5-c000.json.gz | Bin 0 -> 20 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 0 -> 20 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 0 -> 20 bytes ...a-16d5-4381-a03b-9f2ed0830707-c000.json.gz | Bin 0 -> 20 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 0 -> 20 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 0 -> 20 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 0 -> 20 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 0 -> 20 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 0 -> 20 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 0 -> 20 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 0 -> 20 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 0 -> 20 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 0 -> 356 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 0 -> 356 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 0 -> 356 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 0 -> 356 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 0 -> 356 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 0 -> 356 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 0 -> 356 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 0 -> 356 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 0 -> 356 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 0 -> 356 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 0 -> 356 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 0 -> 356 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 0 -> 356 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 0 -> 356 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 0 -> 356 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 0 -> 356 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 0 -> 323 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 0 -> 323 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 0 -> 323 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 0 -> 323 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 0 -> 323 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 0 -> 323 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 0 -> 323 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 0 -> 323 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 0 -> 326 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 0 -> 326 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 0 -> 326 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 0 -> 326 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 0 -> 326 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 0 -> 326 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 0 -> 326 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 0 -> 326 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 0 -> 357 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 0 -> 357 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 0 -> 357 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 0 -> 357 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 0 -> 357 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 0 -> 357 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 0 -> 357 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 0 -> 357 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 0 -> 357 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 0 -> 357 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 0 -> 357 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 0 -> 357 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 0 -> 357 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 0 -> 357 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 0 -> 357 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 0 -> 357 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 0 -> 398 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 0 -> 398 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 0 -> 398 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 0 -> 324 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 0 -> 324 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 0 -> 324 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 0 -> 398 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 0 -> 324 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 0 -> 324 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 0 -> 324 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 0 -> 398 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 0 -> 398 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 0 -> 324 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 0 -> 324 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 0 -> 398 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 0 -> 398 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 0 -> 323 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 0 -> 323 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 0 -> 323 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 0 -> 323 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 0 -> 323 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 0 -> 323 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 0 -> 323 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 0 -> 323 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 0 -> 323 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 0 -> 323 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 0 -> 323 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 0 -> 323 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 0 -> 323 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 0 -> 323 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 0 -> 323 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 0 -> 323 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 0 -> 348 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 0 -> 348 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 0 -> 348 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 0 -> 348 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 0 -> 348 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 0 -> 348 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 0 -> 348 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 0 -> 348 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 0 -> 348 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 0 -> 348 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 0 -> 348 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 0 -> 348 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 0 -> 348 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 0 -> 348 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 0 -> 348 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 0 -> 348 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 0 -> 324 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 0 -> 324 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 0 -> 324 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 0 -> 324 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 0 -> 324 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 0 -> 324 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 0 -> 324 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 0 -> 324 bytes .../dhp-enrichment/null/resOrg/._SUCCESS.crc | Bin 0 -> 8 bytes ...63-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 0 -> 12 bytes ...63-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 0 -> 12 bytes ...63-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 0 -> 12 bytes ...63-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 0 -> 12 bytes ...63-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 0 -> 12 bytes ...63-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 0 -> 12 bytes .../dhp-enrichment/null/resOrg/_SUCCESS | 0 ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 0 -> 20 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 0 -> 169 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 0 -> 137 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 0 -> 184 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 0 -> 126 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 0 -> 164 bytes dhp-workflows/dhp-enrichment/pom.xml | 6 + .../eu/dnetlib/dhp/PropagationConstant.java | 44 ++ .../PrepareResultInstRepoAssociation.java | 2 +- ...arkResultToOrganizationFromIstRepoJob.java | 35 +- .../ExecStep.java | 4 - .../Leaves.java | 14 +- .../PrepareInfo.java | 210 +++--- .../PropagationCounter.java | 77 +++ .../SparkResultToOrganizationFromSemRel.java | 239 ++++++- .../StepActions.java | 213 ++++++ .../input_preparation_parameter.json | 38 ++ .../input_propagation_parameter.json | 50 ++ .../oozie_app/config-default.xml | 58 ++ .../oozie_app/workflow.xml | 192 ++++++ .../ExecStepTest.java | 4 - .../PrepareInfoJobTest.java | 635 ++++++++++++------ .../SparkJobTest.java | 323 +++++++++ .../StepActionsTest.java | 411 ++++++++++++ .../execstep/relsforiteration1/relation | 4 + 311 files changed, 2214 insertions(+), 345 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/null/leaves/._SUCCESS.crc create mode 100644 dhp-workflows/dhp-enrichment/null/leaves/.part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/leaves/_SUCCESS create mode 100644 dhp-workflows/dhp-enrichment/null/leaves/part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/._SUCCESS.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/_SUCCESS create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/._SUCCESS.crc create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00000-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00047-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00055-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/_SUCCESS create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00000-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00047-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00055-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz create mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation diff --git a/dhp-workflows/dhp-enrichment/null/leaves/._SUCCESS.crc b/dhp-workflows/dhp-enrichment/null/leaves/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/leaves/.part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/leaves/.part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/leaves/_SUCCESS b/dhp-workflows/dhp-enrichment/null/leaves/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/null/leaves/part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz b/dhp-workflows/dhp-enrichment/null/leaves/part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/._SUCCESS.crc b/dhp-workflows/dhp-enrichment/null/newRelation/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..444ebf0e34af16d32e663373a50f0161b9d62839 GIT binary patch literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883 GIT binary patch literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..9690bbf5222c463c882b01ad99b0f5b07868ab7a GIT binary patch literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..557a5a816fd1f6ace1abdac1bdaf029130766219 GIT binary patch literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..b3370486fb295f23b42e0034756726538c75d20c GIT binary patch literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..c0e5dc3160176fc74b43c74a92873202cf6f48e3 GIT binary patch literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ba9dc02f200333afbab6d6d41e643ee3829765cd GIT binary patch literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f GIT binary patch literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ea578242fce12608d5ad0076d5368e5ab1e5ce3 GIT binary patch literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..fcf53cc925f084a3da1796db662c25862b516e87 GIT binary patch literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/_SUCCESS b/dhp-workflows/dhp-enrichment/null/newRelation/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a92a574363a35b56952a13cd4f2d141e75662967 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..2298c84f35b5aa39d6f4462670bb5e978eb563d1 GIT binary patch literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe56417e97d74ab67f9276c2107723cc38754721 GIT binary patch literal 326 zcmV-M0lEGkiwFP!000000G(1jYr`-Q-TNzy=1?4ilUQ5ml&Ph3D<#O*$ri|xkxnUT zn*ZLD65@2|((!cf(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5DyNy0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5e63538ede2100ae5998b99b4d0bc4cba57fae99 GIT binary patch literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f7008b990668e5684da12efdc23ea58cd6df210 GIT binary patch literal 398 zcmV;90df8xiwFP!000000PRvuYa1~Tz4uovI-8>PuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdI6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&m}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6cfe02812d5b9e72b8204dbc970edbd359d679c4 GIT binary patch literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e0f1691ec6b977af3514ba3758ef6fa23058a6b GIT binary patch literal 324 zcmV-K0lWSmiwFP!000000G(1hPs1<}-uV@g=L)5yDXq7PsS^mXA%rgWIkDu}l^>N# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxy zurM&b;nT18Fekm6U3W zus2w}=5_9Gr3bLpduAnPJ7DM7b{-K+)4#F@DK;fNZcxG_o{Q4S<2%#i)q(VheDM*NNcX|ch z{^@<-s&8QMxK4Mym0{NN7PGeCNfWvpq9Ll;QRpIsLcMz8sYqHgc)+EPfrJ5dc673PA+r0hzdHU{tq0(29u>b%7J`q7k literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/resOrg/part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/resOrg/part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ee2b282a26fcda1d29218e128be371dd41765e5 GIT binary patch literal 184 zcmV;p07w5HiwFP!000000Hw}34ni>uMbVyRq^5A3A1BT>C=o)5V@nJYUyMS)-hqOS z3d&a;y?b*Ty}IHe+`IPExV+dBz?XuVS!419oDzG=h-h6~WNEM2-PE1y_M|gCx-xh8 zgMXV{UDy3_C0jj|5}1%sl>=GmL6OWr37S@ts_@M~NJDPYVoz}pj$TcL6M*_$fp|8zcuby8TUty_0RRB=jZ(A# literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/resOrg/part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/resOrg/part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..0389977c081f89118b6305eefb5a039bdd116635 GIT binary patch literal 126 zcmb2|=3sz;xu>o94jJ$;T$r0&kyc=rHz%f9u(Tk@ok>g4(b1`OcQJ$6y?s2R@a`gHR!{`DDFk;2v+i3Xdv dCV8Jq7I8aU_-e&Mz8Utr_4ge7WwQ}zEdbLjG)Djc literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/null/resOrg/part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/resOrg/part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..57f1fb89852e4d92e02b9ed0fef7e3e68e292b12 GIT binary patch literal 164 zcmV;V09*ebiwFP!000000G-WC3c@fHh2go&xMK%5N%P!>LlH^t1FFSJt%6whZb4Vz zng8c|!C9VAz~jPWJ=JOMX^>kfhN@sow)sHBIg;gOG~~ph5n~5ic+}x8JDH*E!QwJr z<+6TE$91Jtuhfz>>!oqfNNi0O)M_L!lUaNFhxv1AKgrh-3f@V=E{ABGYq@hlnH{oI S^@ic_-h2RjRboHo0002)#82M< literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 644ac2140..0b4269acd 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -48,6 +48,12 @@ io.github.classgraph classgraph + + eu.dnetlib.dhp + dhp-aggregation + 1.2.4-SNAPSHOT + compile + diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 23e97a97a..522f8b969 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp; +import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -37,6 +38,9 @@ public class PropagationConstant { public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID = "result:organization:instrepo"; public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME = "Propagation of affiliation to result collected from datasources of type institutional repository"; + public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID = "result:organization:semrel"; + public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of affiliation to result through sematic relations"; + public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID = "result:project:semrel"; public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME = "Propagation of result to project through semantic relation"; @@ -49,6 +53,13 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; + public static final String ITERATION_ONE = "ExitAtFirstIteration"; + public static final String ITERATION_TWO = "ExitAtSecondIteration"; + public static final String ITERATION_THREE = "ExitAtThirdIteration"; + public static final String ITERATION_FOUR = "ExitAtFourthIteration"; + public static final String ITERATION_FIVE = "ExitAtFifthIteration"; + public static final String ITERATION_NO_PARENT = "ExitAtNoFirstParentReached"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb " @@ -94,6 +105,39 @@ public class PropagationConstant { return pa; } + public static ArrayList getOrganizationRelationPair(String orgId, + String resultId, + String classID, + String className + + ) { + ArrayList newRelations = new ArrayList(); + newRelations + .add( + getRelation( + orgId, + resultId, + ModelConstants.IS_AUTHOR_INSTITUTION_OF, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + classID, + className)); + newRelations + .add( + getRelation( + resultId, + orgId, + ModelConstants.HAS_AUTHOR_INSTITUTION, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + classID, + className)); + + return newRelations; + } + public static Relation getRelation( String source, String target, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index dbd3bccfb..50ab997b6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.KeyValueSet; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -23,6 +22,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.KeyValueSet; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Datasource; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index be0a4804a..0757ebccd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -8,7 +8,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; -import eu.dnetlib.dhp.KeyValueSet; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -19,6 +18,7 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import eu.dnetlib.dhp.KeyValueSet; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -138,30 +138,15 @@ public class SparkResultToOrganizationFromIstRepoJob { String resultId = potentialUpdate.getKey(); organizations .forEach( - orgId -> { - newRelations - .add( - getRelation( - orgId, - resultId, - ModelConstants.IS_AUTHOR_INSTITUTION_OF, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - newRelations - .add( - getRelation( - resultId, - orgId, - ModelConstants.HAS_AUTHOR_INSTITUTION, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - }); + orgId -> newRelations + .addAll( + getOrganizationRelationPair( + orgId, + resultId, + PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, + PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)) + + ); return newRelations.iterator(); }; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java deleted file mode 100644 index ed33a6297..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStep.java +++ /dev/null @@ -1,4 +0,0 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; - -public class ExecStep { -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java index 602dc1aaa..7984721e8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java @@ -1,4 +1,16 @@ + package eu.dnetlib.dhp.resulttoorganizationfromsemrel; -public class Leaves { +import java.io.Serializable; + +public class Leaves implements Serializable { + private String value; + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java index 34817d9a9..71b032b2b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -1,26 +1,32 @@ + package eu.dnetlib.dhp.resulttoorganizationfromsemrel; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Relation; - - -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; - -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import scala.Tuple2; +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.io.Serializable; import java.util.*; +import java.util.stream.Collectors; -import static eu.dnetlib.dhp.PropagationConstant.*; +import org.apache.commons.collections.iterators.ArrayListIterator; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.*; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.sun.tools.internal.ws.processor.model.Model; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +import net.sf.saxon.expr.instruct.ForEach; +import scala.Tuple2; /** * Searches for all the association between result and organization already existing in the graph @@ -29,90 +35,120 @@ import static eu.dnetlib.dhp.PropagationConstant.*; public class PrepareInfo implements Serializable { - //leggo le relazioni e seleziono quelle fra result ed organizzazioni - //raggruppo per result e salvo - //r => {o1, o2, o3} + // leggo le relazioni e seleziono quelle fra result ed organizzazioni + // raggruppo per result e salvo + // r => {o1, o2, o3} - //leggo le relazioni fra le organizzazioni e creo la gerarchia delle parentele: - //hashMap key organizzazione -> value tutti i suoi padri - // o => {p1, p2} + // leggo le relazioni fra le organizzazioni e creo la gerarchia delle parentele: + // hashMap key organizzazione -> value tutti i suoi padri + // o => {p1, p2} - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class); - //associa i figli con tutti i loro padri - private static final String relOrgQuery = - "SELECT target key, collect_set(source) as valueSet " + - "FROM relation " + - "WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() + "' and datainfo.deletedbyinference = false " + - "GROUP BY target"; + // associate orgs with all their parent + private static final String relOrgQuery = "SELECT target key, collect_set(source) as valueSet " + + "FROM relation " + + "WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() + + "' and datainfo.deletedbyinference = false " + + "GROUP BY target"; - private static final String relResQuery = "SELECT source key, collect_set(target) as valueSet " + - "FROM relation " + - "WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + "' and datainfo.deletedbyinference = false " + - "GROUP BY source"; + private static final String relResQuery = "SELECT source key, collect_set(target) as valueSet " + + "FROM relation " + + "WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + + "' and datainfo.deletedbyinference = false " + + "GROUP BY source"; + public static void main(String[] args) throws Exception { - public static void prepareChildrenParent(SparkSession spark, String inputPath, String childParentOrganizationPath){ - Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); - relation.createOrReplaceTempView("relation"); + String jsonConfiguration = IOUtils + .toString( + SparkResultToOrganizationFromIstRepoJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json")); - spark - .sql(relOrgQuery) - .as(Encoders.bean(KeyValueSet.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(childParentOrganizationPath); - } + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - public static void prepareResultOrgnization(SparkSession spark, String inputPath, String resultOrganizationPath){ - spark - .sql(relResQuery) - .as(Encoders.bean(KeyValueSet.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(resultOrganizationPath); - } + parser.parseArgument(args); - private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, - String currentIterationPath, String resultOrganizationPath){ - Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); - relation.createOrReplaceTempView("relation"); + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - spark - .sql(relOrgQuery) - .as(Encoders.bean(KeyValueSet.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(childParentOrganizationPath); + String graphPath = parser.get("graphPath"); + log.info("graphPath: {}", graphPath); - spark - .sql(relResQuery) - .as(Encoders.bean(KeyValueSet.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(resultOrganizationPath); + final String leavesPath = parser.get("leavesPath"); + log.info("leavesPath: {}", leavesPath); - Dataset children = spark.sql("Select distinct target from relation where relclass='IsParentOf' and datainfo.deletedbyinference = false").as(Encoders.STRING()); + final String childParentPath = parser.get("childParentPath"); + log.info("childParentPath: {}", childParentPath); - Dataset parent = spark.sql("Select distinct source from relation where relclass='IsParentOf' and datainfo.deletedbyinference = false").as(Encoders.STRING()); + final String resultOrganizationPath = parser.get("resultOrgPath"); + log.info("resultOrganizationPath: {}", resultOrganizationPath); - //prendo dalla join i risultati che hanno solo il lato sinistro: sono foglie - children.joinWith(parent, children.col("_1").equalTo(parent.col("_1")), "left") - .map((MapFunction, String>) value -> { - if (Optional.ofNullable(value._2()).isPresent()) { - return null; - } + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - return value._1(); - }, Encoders.STRING()).filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .json(currentIterationPath); - } + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> prepareInfo( + spark, + graphPath, + childParentPath, + leavesPath, + resultOrganizationPath)); + } + + private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, + String currentIterationPath, String resultOrganizationPath) { + Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); + relation.createOrReplaceTempView("relation"); + + spark + .sql(relOrgQuery) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(childParentOrganizationPath); + + spark + .sql(relResQuery) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(resultOrganizationPath); + + Dataset children = spark + .sql( + "Select distinct target as child from relation where " + + "lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() + + "' and datainfo.deletedbyinference = false") + .as(Encoders.STRING()); + + Dataset parent = spark + .sql( + "Select distinct source as parent from relation " + + "where lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() + + "' and datainfo.deletedbyinference = false") + .as(Encoders.STRING()); + + // prendo dalla join i risultati che hanno solo il lato sinistro: sono foglie + children + .joinWith(parent, children.col("child").equalTo(parent.col("parent")), "left") + .map((MapFunction, String>) value -> { + if (Optional.ofNullable(value._2()).isPresent()) { + return null; + } + + return value._1(); + }, Encoders.STRING()) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .json(currentIterationPath); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java new file mode 100644 index 000000000..788eff0e3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java @@ -0,0 +1,77 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import java.io.Serializable; + +import org.apache.spark.util.LongAccumulator; + +public class PropagationCounter implements Serializable { + private LongAccumulator iterationOne; + private LongAccumulator iterationTwo; + private LongAccumulator iterationThree; + private LongAccumulator iterationFour; + private LongAccumulator iterationFive; + private LongAccumulator notReachedFirstParent; + + public PropagationCounter() { + } + + public PropagationCounter(LongAccumulator iterationOne, LongAccumulator iterationTwo, + LongAccumulator iterationThree, LongAccumulator iterationFour, LongAccumulator iterationFive, + LongAccumulator notReachedFirstParent) { + this.iterationOne = iterationOne; + this.iterationTwo = iterationTwo; + this.iterationThree = iterationThree; + this.iterationFour = iterationFour; + this.iterationFive = iterationFive; + this.notReachedFirstParent = notReachedFirstParent; + } + + public LongAccumulator getIterationOne() { + return iterationOne; + } + + public void setIterationOne(LongAccumulator iterationOne) { + this.iterationOne = iterationOne; + } + + public LongAccumulator getIterationTwo() { + return iterationTwo; + } + + public void setIterationTwo(LongAccumulator iterationTwo) { + this.iterationTwo = iterationTwo; + } + + public LongAccumulator getIterationThree() { + return iterationThree; + } + + public void setIterationThree(LongAccumulator iterationThree) { + this.iterationThree = iterationThree; + } + + public LongAccumulator getIterationFour() { + return iterationFour; + } + + public void setIterationFour(LongAccumulator iterationFour) { + this.iterationFour = iterationFour; + } + + public LongAccumulator getIterationFive() { + return iterationFive; + } + + public void setIterationFive(LongAccumulator iterationFive) { + this.iterationFive = iterationFive; + } + + public LongAccumulator getNotReachedFirstParent() { + return notReachedFirstParent; + } + + public void setNotReachedFirstParent(LongAccumulator notReachedFirstParent) { + this.notReachedFirstParent = notReachedFirstParent; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index bd1b7803f..e716f8d86 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -1,29 +1,228 @@ + package eu.dnetlib.dhp.resulttoorganizationfromsemrel; -public class SparkResultToOrganizationFromSemRel { +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.Constants.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; - //per ogni figlio nel file delle organizzazioni - //devo fare una roba iterativa che legge info da un file e le cambia via via - //passo 1: creo l'informazione iniale: organizzazioni che non hanno figli con almeno un padre - //ogni organizzazione punta alla lista di padri - //eseguo la propagazione dall'organizzazione figlio all'organizzazione padre - //ricerco nel dataset delle relazioni se l'organizzazione a cui ho propagato ha, a sua volta, dei padri - //e propago anche a quelli e cosi' via fino a che arrivo ad organizzazione senza padre +import java.io.Serializable; +import java.util.Arrays; - //organizationFile: - //f => {p1, p2, ..., pn} - //resultFile - //o => {r1, r2, ... rm} +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.util.LongAccumulator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; - //supponiamo che f => {r1, r2} e che nessuno dei padri abbia gia' l'associazione con questi result - //quindi - //p1 => {r1, r2} - //p2 => {r1, r2} - //pn => {r1, r2} +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +public class SparkResultToOrganizationFromSemRel implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); + private static final int MAX_ITERATION = 5; - //mi serve un file con tutta la gerarchia per organizzazioni - //un file con le organizzazioni foglia da joinare con l'altro file - //un file con le associazioni organizzazione -> result forse meglio result -> organization + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkResultToOrganizationFromIstRepoJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String graphPath = parser.get("graphPath"); + log.info("graphPath: {}", graphPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String leavesPath = parser.get("leavesPath"); + log.info("leavesPath: {}", leavesPath); + + final String childParentPath = parser.get("childParentPath"); + log.info("childParentPath: {}", childParentPath); + + final String resultOrganizationPath = parser.get("resultOrgPath"); + log.info("resultOrganizationPath: {}", resultOrganizationPath); + + final String workingPath = parser.get("workingDir"); + log.info("workingPath: {}", workingPath); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> execPropagation( + spark, + leavesPath, + childParentPath, + resultOrganizationPath, + graphPath, + workingPath, + outputPath)); + } + + public static void execPropagation(SparkSession spark, + String leavesPath, + String childParentPath, + String resultOrganizationPath, + String graphPath, + String workingPath, + String outputPath) { + + final LongAccumulator iterationOne = spark.sparkContext().longAccumulator(ITERATION_ONE); + final LongAccumulator iterationTwo = spark.sparkContext().longAccumulator(ITERATION_TWO); + final LongAccumulator iterationThree = spark.sparkContext().longAccumulator(ITERATION_THREE); + final LongAccumulator iterationFour = spark.sparkContext().longAccumulator(ITERATION_FOUR); + final LongAccumulator iterationFive = spark.sparkContext().longAccumulator(ITERATION_FIVE); + final LongAccumulator notReachedFirstParent = spark.sparkContext().longAccumulator(ITERATION_NO_PARENT); + + final PropagationCounter propagationCounter = new PropagationCounter(iterationOne, + iterationTwo, + iterationThree, + iterationFour, + iterationFive, + notReachedFirstParent); + + doPropagate( + spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + workingPath, outputPath, propagationCounter); + + } + + private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, + String resultOrganizationPath, String graphPath, String workingPath, String outputPath, + PropagationCounter propagationCounter) { + int iteration = 0; + long leavesCount = 0; + + do { + iteration++; + StepActions + .execStep( + spark, graphPath, workingPath + "/newRelation", + leavesPath, childParentPath, resultOrganizationPath); + StepActions + .prepareForNextStep( + spark, workingPath + "/newRelation", resultOrganizationPath, leavesPath, + childParentPath, workingPath + "/leaves", workingPath + "/resOrg"); + moveOutput(spark, workingPath, leavesPath, resultOrganizationPath); + leavesCount = readPath(spark, leavesPath, Leaves.class).count(); + } while (leavesCount > 0 && iteration < MAX_ITERATION); + + if (leavesCount == 0) { + switch (String.valueOf(iteration)) { + case "1": + propagationCounter.getIterationOne().add(1); + break; + case "2": + propagationCounter.getIterationTwo().add(1); + break; + case "3": + propagationCounter.getIterationThree().add(1); + break; + case "4": + propagationCounter.getIterationFour().add(1); + break; + case "5": + propagationCounter.getIterationFive().add(1); + break; + default: + break; + } + } else { + propagationCounter.getNotReachedFirstParent().add(1); + } + + addNewRelations(spark, workingPath + "/newRelation", outputPath); + } + + private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, + String resultOrganizationPath) { + readPath(spark, workingPath + "/leaves", Leaves.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(leavesPath); + + readPath(spark, workingPath + "/resOrg", KeyValueSet.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(resultOrganizationPath); + + } + + private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) { + Dataset relation = readPath(spark, newRelationPath, Relation.class); + + relation + .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(Relation.class)) + .flatMap( + (FlatMapFunction) r -> Arrays + .asList( + r, getRelation( + r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) + .iterator() + + , Encoders.bean(Relation.class)) + .write() + + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + } + + // per ogni figlio nel file delle organizzazioni + // devo fare una roba iterativa che legge info da un file e le cambia via via + // passo 1: creo l'informazione iniale: organizzazioni che non hanno figli con almeno un padre + // ogni organizzazione punta alla lista di padri + // eseguo la propagazione dall'organizzazione figlio all'organizzazione padre + // ricerco nel dataset delle relazioni se l'organizzazione a cui ho propagato ha, a sua volta, dei padri + // e propago anche a quelli e cosi' via fino a che arrivo ad organizzazione senza padre + + // organizationFile: + // f => {p1, p2, ..., pn} + // resultFile + // o => {r1, r2, ... rm} + + // supponiamo che f => {r1, r2} e che nessuno dei padri abbia gia' l'associazione con questi result + // quindi + // p1 => {r1, r2} + // p2 => {r1, r2} + // pn => {r1, r2} + + // mi serve un file con tutta la gerarchia per organizzazioni + // un file con le organizzazioni foglia da joinare con l'altro file + // un file con le associazioni organizzazione -> result forse meglio result -> organization + + // eseguo gli step fino a che ho foglie nel set + // quando non ne ho piu' creo relazioni doppio verso per le nuove propagate che ho introdotto } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java new file mode 100644 index 000000000..2d2fd866d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -0,0 +1,213 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.PropagationConstant.readPath; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.spark.api.java.function.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; + +public class StepActions implements Serializable { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final Logger log = LoggerFactory.getLogger(StepActions.class); + + public static void execStep(SparkSession spark, + String graphPath, String newRelationPath, + String leavesPath, String chldParentOrgPath, String resultOrgPath) { + + Dataset relationGraph = readPath(spark, graphPath + "/relation", Relation.class) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)); + // select only the relation source target among those proposed by propagation that are not already existent + getNewRels( + newRelationPath, relationGraph, + getPropagationRelation(spark, leavesPath, chldParentOrgPath, resultOrgPath)); + + } + + public static void prepareForNextStep(SparkSession spark, String selectedRelsPath, String resultOrgPath, + String leavesPath, String chldParentOrgPath, String leavesOutputPath, + String orgOutputPath) { + // use of the parents as new leaves set + changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); + + // add the new relations obtained from propagation to the keyvalueset result organization + updateResultOrganization( + spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath); + } + + private static void updateResultOrganization(SparkSession spark, String resultOrgPath, + Dataset selectedRels, String outputPath) { + Dataset resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); + resultOrg + .joinWith( + selectedRels, resultOrg + .col("key") + .equalTo(selectedRels.col("source")), + "left") + .groupByKey((MapFunction, String>) mf -> mf._1().getKey(), Encoders.STRING()) + .mapGroups((MapGroupsFunction, KeyValueSet>) (key, it) -> { + Tuple2 first = it.next(); + if (!Optional.ofNullable(first._2()).isPresent()) { + return first._1(); + } + KeyValueSet ret = new KeyValueSet(); + ret.setKey(first._1().getKey()); + HashSet hs = new HashSet<>(); + hs.addAll(first._1().getValueSet()); + hs.add(first._2().getTarget()); + it.forEachRemaining(rel -> hs.add(rel._2().getTarget())); + ArrayList orgs = new ArrayList<>(); + orgs.addAll(hs); + ret.setValueSet(orgs); + return ret; + }, Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } + + private static void changeLeavesSet(SparkSession spark, String leavesPath, String chldParentOrgPath, + String leavesOutputPath) { + Dataset childParent = readPath(spark, chldParentOrgPath, KeyValueSet.class); + Dataset leaves = readPath(spark, leavesPath, Leaves.class); + + childParent.createOrReplaceTempView("childParent"); + leaves.createOrReplaceTempView("leaves"); + + spark + .sql( + "SELECT distinct parent as value " + + "FROM leaves " + + "JOIN (SELECT key, parent " + + " FROM childParent " + + " LATERAL VIEW explode(valueSet) kv as parent) tmp " + + "ON value = key ") + .as(Encoders.bean(Leaves.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(leavesOutputPath); + } + + @NotNull + private static void getNewRels(String newRelationPath, Dataset relationDataset, + Dataset newRels) { + // selects new, not already existent relations + // union of new propagation relations to the relation set + // grouping from sourcetarget (we are sure the only relations are those from result to organization by + // construction of the set) + // if at least one relation in the set was harvested no new relation will be returned + + relationDataset + .union(newRels) + .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + + ArrayList relationList = new ArrayList<>(); + relationList.add(it.next()); + it.forEachRemaining(rel -> relationList.add(rel)); + + if (relationList + .stream() + .filter( + rel -> rel + .getDataInfo() + .getProvenanceaction() + .getClassname() + .equals(ModelConstants.HARVESTED)) + .count() > 0) { + return null; + } + + return new ObjectMapper().writeValueAsString(relationList.get(0)); + + }, Encoders.STRING()) + .filter(Objects::nonNull) + .map( + (MapFunction) r -> new ObjectMapper().readValue(r, Relation.class), + Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(newRelationPath); + + } + + // get the possible relations from propagation + private static Dataset getPropagationRelation(SparkSession spark, + String leavesPath, + String chldParentOrgPath, + String resultOrgPath) { + + Dataset childParent = readPath(spark, chldParentOrgPath, KeyValueSet.class); + Dataset resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); + Dataset leaves = readPath(spark, leavesPath, Leaves.class); + + childParent.createOrReplaceTempView("childParent"); + resultOrg.createOrReplaceTempView("resultOrg"); + leaves.createOrReplaceTempView("leaves"); + + Dataset resultParent = spark + .sql( + "SELECT resId as key, " + + "collect_set(parent) valueSet " + + "FROM (SELECT key as child, parent " + + " FROM childParent " + + " LATERAL VIEW explode(valueSet) ks as parent) as cp " + + "JOIN leaves " + + "ON leaves.value = cp.child " + + "JOIN (" + + "SELECT key as resId, org " + + "FROM resultOrg " + + "LATERAL VIEW explode (valueSet) ks as org ) as ro " + + "ON leaves.value = ro.org " + + "GROUP BY resId") + .as(Encoders.bean(KeyValueSet.class)); + + // resultParent.foreach((ForeachFunction)kv -> + // System.out.println(OBJECT_MAPPER.writeValueAsString(kv))); + // create new relations from result to organization for each result linked to a leaf + Dataset tmp = resultParent + .flatMap( + (FlatMapFunction) v -> v + .getValueSet() + .stream() + .map( + orgId -> getRelation( + v.getKey(), + orgId, + ModelConstants.HAS_AUTHOR_INSTITUTION, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) + .collect(Collectors.toList()) + .iterator(), + Encoders.bean(Relation.class)); + tmp.foreach((ForeachFunction) r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + return tmp; + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json new file mode 100644 index 000000000..baa8ba333 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json @@ -0,0 +1,38 @@ +[ + { + "paramName":"gp", + "paramLongName":"graphPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json new file mode 100644 index 000000000..bd7bb50f9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json @@ -0,0 +1,50 @@ +[ + { + "paramName":"gp", + "paramLongName":"graphPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "wd", + "paramLongName": "workingDir", + "paramDescription": "true if it is a test running", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml new file mode 100644 index 000000000..32307e13d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,192 @@ + + + + sourcePath + the source path + + + outputPath + sets the outputPath + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('resumeFrom') eq 'PrepareInfo'} + + + + + + + + + + + + + + + + + + + + + + + + + + + ${nameNode}/${sourcePath}/relation + ${nameNode}/${outputPath}/relation + + + + + + + + ${nameNode}/${sourcePath}/publication + ${nameNode}/${outputPath}/publication + + + + + + + + ${nameNode}/${sourcePath}/dataset + ${nameNode}/${outputPath}/dataset + + + + + + + + ${nameNode}/${sourcePath}/otherresearchproduct + ${nameNode}/${outputPath}/otherresearchproduct + + + + + + + + ${nameNode}/${sourcePath}/software + ${nameNode}/${outputPath}/software + + + + + + + + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --graphPath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --leavesPath${workingDir}/preparedInfo/leavesPath + --childParentPath${workingDir}/preparedInfo/childParentPath + --resultOrgPath${workingDir}/preparedInfo/resultOrgPath + + + + + + + + yarn + cluster + resultToOrganizationFromSemRel + eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --graphPath${sourcePath} + --outputPath${outputPath}/relation + --leavesPath${workingDir}/preparedInfo/leavesPath + --leavesPath${workingDir}/preparedInfo/childParentPath + --resultOrgPath${workingDir}/preparedInfo/resultOrgPath + --hive_metastore_uris${hive_metastore_uris} + --workingDir${workingDir}/working + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java deleted file mode 100644 index cd55a0688..000000000 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/ExecStepTest.java +++ /dev/null @@ -1,4 +0,0 @@ -package eu.dnetlib.dhp.resulttoorganizationfromsemrel; - -public class ExecStepTest { -} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java index 1d3498d04..973c663b9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java @@ -1,10 +1,13 @@ package eu.dnetlib.dhp.resulttoorganizationfromsemrel; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob; -import eu.dnetlib.dhp.schema.oaf.Relation; +import static eu.dnetlib.dhp.PropagationConstant.readPath; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -22,10 +25,11 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob; +import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareInfoJobTest { @@ -65,259 +69,484 @@ public class PrepareInfoJobTest { spark.stop(); } - @Test - public void childParentTest1() { + public void childParentTest1() throws Exception { - PrepareInfo.prepareChildrenParent(spark, getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") - .getPath(), workingDir.toString() + "/childParentOrg/"); + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile( workingDir.toString() + "/childParentOrg/") - .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + .textFile(workingDir.toString() + "/childParentOrg/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); Assertions.assertEquals(6, verificationDs.count()); - Assertions.assertEquals(1, verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertEquals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", - verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") - .collectAsList().get(0).getValueSet().get(0)); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + verificationDs + .filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .get(0)); - Assertions.assertEquals(2, verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") - .collectAsList().get(0).getValueSet().contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); - Assertions.assertTrue(verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") - .collectAsList().get(0).getValueSet().contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") - .collectAsList().get(0).getValueSet().contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") - .collectAsList().get(0).getValueSet().contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") - .collectAsList().get(0).getValueSet().contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") - .collectAsList().get(0).getValueSet().contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + verificationDs + .foreach((ForeachFunction) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); } - - @Test - public void childParentTest2() { + public void childParentTest2() throws Exception { - PrepareInfo.prepareChildrenParent(spark, getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2") - .getPath(), workingDir.toString() + "/childParentOrg/"); + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile( workingDir.toString() + "/childParentOrg/") - .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + .textFile(workingDir.toString() + "/childParentOrg/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); Assertions.assertEquals(5, verificationDs.count()); - Assertions.assertEquals(0, verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'").count()); + Assertions + .assertEquals( + 0, verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'").count()); - Assertions.assertEquals(1, verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertEquals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", - verificationDs.filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") - .collectAsList().get(0).getValueSet().get(0)); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .get(0)); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") - .collectAsList().get(0).getValueSet().contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") - .collectAsList().get(0).getValueSet().contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") - .collectAsList().get(0).getValueSet().contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); - Assertions.assertEquals(1, verificationDs.filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") - .collectAsList().get(0).getValueSet().size()); - Assertions.assertTrue(verificationDs.filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") - .collectAsList().get(0).getValueSet().contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); } - @Test - public void resultOrganizationTest1(){ + public void resultOrganizationTest1() throws Exception { - } - - /** - * All the possible updates will produce a new relation. No relations are already linked in the grpha - * - * @throws Exception - */ - @Test - void UpdateTenTest() throws Exception { - final String potentialUpdatePath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") - .getPath(); - final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob + PrepareInfo .main( new String[] { - "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + .getPath(), "-hive_metastore_uris", "", - "-saveGraph", "true", - "-outputPath", workingDir.toString() + "/relation", - "-potentialUpdatePath", potentialUpdatePath, - "-alreadyLinkedPath", alreadyLinkedPath, + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/resultOrganization/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(5, verificationDs.count()); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '50|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0")); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '50|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '50|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '50|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1")); + + verificationDs + .foreach((ForeachFunction) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); + + } + + @Test + public void foundLeavesTest1() throws Exception { +// PrepareInfo.prepareInfo(spark, getClass() +// .getResource( +// "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") +// .getPath(), workingDir.toString() + "/childParentOrg/", workingDir.toString() + "/currentIteration/",workingDir.toString() + "/resultOrganization/"); + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/currentIteration/") + .map(item -> OBJECT_MAPPER.readValue(item, String.class)); + + Assertions.assertEquals(0, tmp.count()); + + } + + @Test + public void foundLeavesTest2() throws Exception { + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + + }); +// PrepareInfo.prepareInfo(spark, getClass() +// .getResource( +// "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") +// .getPath(), workingDir.toString() + "/childParentOrg/", workingDir.toString() + "/currentIteration/",workingDir.toString() + "/resultOrganization/"); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/currentIteration/") + .map(item -> OBJECT_MAPPER.readValue(item, Leaves.class)); - // got 20 new relations because "produces" and "isProducedBy" are added - Assertions.assertEquals(10, tmp.count()); + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Leaves.class)); - Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - Assertions.assertEquals(5, verificationDs.filter("relClass = 'produces'").count()); - Assertions.assertEquals(5, verificationDs.filter("relClass = 'isProducedBy'").count()); + Assertions.assertEquals(3, verificationDs.count()); Assertions .assertEquals( - 5, - verificationDs - .filter( - (FilterFunction) r -> r.getSource().startsWith("50") - && r.getTarget().startsWith("40") - && r.getRelClass().equals("isProducedBy")) + 1, verificationDs + .filter("value = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") .count()); - Assertions - .assertEquals( - 5, - verificationDs - .filter( - (FilterFunction) r -> r.getSource().startsWith("40") - && r.getTarget().startsWith("50") - && r.getRelClass().equals("produces")) - .count()); - - verificationDs.createOrReplaceTempView("temporary"); Assertions .assertEquals( - 10, - spark - .sql( - "Select * from temporary where datainfo.inferenceprovenance = 'propagation'") + 1, verificationDs + .filter("value = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") .count()); + + Assertions + .assertEquals( + 1, verificationDs + .filter("value = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .count()); + + verificationDs.foreach((ForeachFunction) l -> System.out.println(OBJECT_MAPPER.writeValueAsString(l))); + } - /** - * One of the relations in the possible updates is already linked to the project in the graph. All the others are - * not. There will be 9 new associations leading to 18 new relations - * - * @throws Exception - */ - @Test - void UpdateMixTest() throws Exception { - final String potentialUpdatepath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") - .getPath(); - final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob - .main( - new String[] { - "-isTest", Boolean.TRUE.toString(), - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-hive_metastore_uris", "", - "-saveGraph", "true", - "-outputPath", workingDir.toString() + "/relation", - "-potentialUpdatePath", potentialUpdatepath, - "-alreadyLinkedPath", alreadyLinkedPath, - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - // JavaRDD tmp = sc.textFile("/tmp/relation") - // .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - // got 20 new relations because "produces" and "isProducedBy" are added - Assertions.assertEquals(8, tmp.count()); - - Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - Assertions.assertEquals(4, verificationDs.filter("relClass = 'produces'").count()); - Assertions.assertEquals(4, verificationDs.filter("relClass = 'isProducedBy'").count()); - - Assertions - .assertEquals( - 4, - verificationDs - .filter( - (FilterFunction) r -> r.getSource().startsWith("50") - && r.getTarget().startsWith("40") - && r.getRelClass().equals("isProducedBy")) - .count()); - Assertions - .assertEquals( - 4, - verificationDs - .filter( - (FilterFunction) r -> r.getSource().startsWith("40") - && r.getTarget().startsWith("50") - && r.getRelClass().equals("produces")) - .count()); - - verificationDs.createOrReplaceTempView("temporary"); - - Assertions - .assertEquals( - 8, - spark - .sql( - "Select * from temporary where datainfo.inferenceprovenance = 'propagation'") - .count()); - } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java new file mode 100644 index 000000000..5a4b19a24 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java @@ -0,0 +1,323 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; +import static eu.dnetlib.dhp.PropagationConstant.readPath; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class SparkJobTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(SparkJobTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareInfoJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void completeExecution() throws Exception { + + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep") + .getPath(); + final String leavesPath = getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(); + final String childParentPath = getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(); + + final String resultOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(); + + readPath(spark, leavesPath, Leaves.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/leavesInput"); + + readPath(spark, resultOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/orgsInput"); + + SparkResultToOrganizationFromSemRel + + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", graphPath, + "-hive_metastore_uris", "", + "-outputPath", workingDir.toString() + "/relation", + "-leavesPath", workingDir.toString() + "/leavesInput", + "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-childParentPath", childParentPath, + "-workingDir", workingDir.toString() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(18, tmp.count()); + tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); + tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); + tmp + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); + tmp + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + r.getDataInfo().getProvenanceaction().getClassid())); + tmp + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, + r.getDataInfo().getProvenanceaction().getClassname())); + tmp + .foreach( + r -> Assertions + .assertEquals( + "0.85", + r.getDataInfo().getTrust())); + + Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); + tmp + .filter(r -> r.getSource().substring(0, 3).equals("50|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 3, tmp.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); + tmp + .filter(r -> r.getSource().substring(0, 3).equals("20|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 3, tmp.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java new file mode 100644 index 000000000..5c715f3b9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java @@ -0,0 +1,411 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class StepActionsTest { + + private static final Logger log = LoggerFactory.getLogger(StepActionsTest.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareInfoJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void execStepTest() { + + StepActions + .execStep( + spark, getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/") + .getPath(), + workingDir.toString() + "/newRelationPath", + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/newRelationPath") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(4, tmp.count()); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals("propagation", r.getDataInfo().getInferenceprovenance())); + + verificationDs + .foreach((ForeachFunction) r -> Assertions.assertEquals("0.85", r.getDataInfo().getTrust())); + + verificationDs + .foreach((ForeachFunction) r -> Assertions.assertEquals("50|", r.getSource().substring(0, 3))); + + verificationDs + .foreach((ForeachFunction) r -> Assertions.assertEquals("20|", r.getTarget().substring(0, 3))); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + r.getDataInfo().getProvenanceaction().getClassid())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, + r.getDataInfo().getProvenanceaction().getClassname())); + + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074", r.getTarget())); + + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", r.getTarget())); + + Assertions + .assertEquals( + 2, + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .count()); + + Assertions + .assertEquals( + 1, + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d") && + r.getTarget().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .count()); + + Assertions + .assertEquals( + 1, + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d") && + r.getTarget().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .count()); + + tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + } + + @Test + public void prepareForNextStepLeavesTest() { + + StepActions + .prepareForNextStep( + spark, + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(), + workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/tempLeaves") + .map(item -> OBJECT_MAPPER.readValue(item, Leaves.class)); + + Assertions.assertEquals(3, tmp.count()); + + Assertions + .assertEquals( + 1, tmp.filter(l -> l.getValue().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + + Assertions + .assertEquals( + 1, tmp.filter(l -> l.getValue().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + + Assertions + .assertEquals( + 1, tmp.filter(l -> l.getValue().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + + } + + @Test + public void prepareFonNextStepOrgTest() { + StepActions + .prepareForNextStep( + spark, + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(), + workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/tempOrgs") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Assertions.assertEquals(5, tmp.count()); + + Assertions + .assertEquals( + 1, tmp + .filter(kv -> kv.getKey().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::1cae0b82b56ccd97c2db1f698def7074", + tmp + .filter(kv -> kv.getKey().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .collect() + .get(0) + .getValueSet() + .get(0)); + + Assertions + .assertEquals( + 1, tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .collect() + .get(0) + .getValueSet() + .get(0)); + + Assertions + .assertEquals( + 4, tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertEquals( + 2, tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .collect() + .get(0) + .getValueSet() + .contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + Assertions + .assertEquals( + 3, tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation new file mode 100644 index 000000000..32b816ef7 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation @@ -0,0 +1,4 @@ +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false,"validationDate":null,"properties":[]} +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false,"validationDate":null,"properties":[]} +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","validated":false,"validationDate":null,"properties":[]} +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file From b9d124bb7cac298304fa37a4e7a74afe30646a53 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 13:55:37 +0100 Subject: [PATCH 004/176] [Enrichment: Propagation through parent-child relationships] Added counters, and changed constraint to verify if filtering out the relation (from classname = harvested to classid != propagation) --- .../PrepareInfo.java | 5 --- .../SparkResultToOrganizationFromSemRel.java | 31 +++---------------- .../StepActions.java | 6 ++-- .../oozie_app/workflow.xml | 2 +- 4 files changed, 8 insertions(+), 36 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java index 71b032b2b..732ba2c2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -6,9 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.io.Serializable; import java.util.*; -import java.util.stream.Collectors; -import org.apache.commons.collections.iterators.ArrayListIterator; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.*; @@ -17,15 +15,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import com.sun.tools.internal.ws.processor.model.Model; import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.PropagationConstant; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; -import net.sf.saxon.expr.instruct.ForEach; import scala.Tuple2; /** diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index e716f8d86..a900fe0d8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.resulttoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.Constants.*; + import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.io.Serializable; @@ -22,12 +22,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.PropagationConstant; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; + public class SparkResultToOrganizationFromSemRel implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); private static final int MAX_ITERATION = 5; @@ -113,7 +114,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { String resultOrganizationPath, String graphPath, String workingPath, String outputPath, PropagationCounter propagationCounter) { int iteration = 0; - long leavesCount = 0; + long leavesCount; do { iteration++; @@ -199,30 +200,6 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { .json(outputPath); } - // per ogni figlio nel file delle organizzazioni - // devo fare una roba iterativa che legge info da un file e le cambia via via - // passo 1: creo l'informazione iniale: organizzazioni che non hanno figli con almeno un padre - // ogni organizzazione punta alla lista di padri - // eseguo la propagazione dall'organizzazione figlio all'organizzazione padre - // ricerco nel dataset delle relazioni se l'organizzazione a cui ho propagato ha, a sua volta, dei padri - // e propago anche a quelli e cosi' via fino a che arrivo ad organizzazione senza padre - // organizationFile: - // f => {p1, p2, ..., pn} - // resultFile - // o => {r1, r2, ... rm} - - // supponiamo che f => {r1, r2} e che nessuno dei padri abbia gia' l'associazione con questi result - // quindi - // p1 => {r1, r2} - // p2 => {r1, r2} - // pn => {r1, r2} - - // mi serve un file con tutta la gerarchia per organizzazioni - // un file con le organizzazioni foglia da joinare con l'altro file - // un file con le associazioni organizzazione -> result forse meglio result -> organization - - // eseguo gli step fino a che ho foglie nel set - // quando non ne ho piu' creo relazioni doppio verso per le nuove propagate che ho introdotto } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java index 2d2fd866d..affe1f56f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -131,11 +131,11 @@ public class StepActions implements Serializable { if (relationList .stream() .filter( - rel -> rel + rel -> !rel .getDataInfo() .getProvenanceaction() - .getClassname() - .equals(ModelConstants.HARVESTED)) + .getClassid() + .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) .count() > 0) { return null; } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml index 32307e13d..e62ce0f5a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -176,7 +176,7 @@ --graphPath${sourcePath} --outputPath${outputPath}/relation --leavesPath${workingDir}/preparedInfo/leavesPath - --leavesPath${workingDir}/preparedInfo/childParentPath + --childParentPath${workingDir}/preparedInfo/childParentPath --resultOrgPath${workingDir}/preparedInfo/resultOrgPath --hive_metastore_uris${hive_metastore_uris} --workingDir${workingDir}/working From 0857849a86f6dc1207c27a8468e33e368a747fb3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 11:02:37 +0100 Subject: [PATCH 005/176] [Graph DUMP] Remove dump of measure until it will be clear where to put it (at the level of result or at the level of the instance) --- .../dhp/oa/graph/dump/ResultMapper.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 49468540d..9e2212f4c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -43,15 +43,15 @@ public class ResultMapper implements Serializable { try { addTypeSpecificInformation(out, input, ort); - Optional> mes = Optional.ofNullable(input.getMeasures()); - if (mes.isPresent()) { - List measure = new ArrayList<>(); - mes - .get() - .forEach( - m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); - out.setMeasures(measure); - } +// Optional> mes = Optional.ofNullable(input.getMeasures()); +// if (mes.isPresent()) { +// List measure = new ArrayList<>(); +// mes +// .get() +// .forEach( +// m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); +// out.setMeasures(measure); +// } Optional .ofNullable(input.getAuthor()) From c10ff6928c67af82cd2b04eb6232608a1e4247a6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 11:36:21 +0100 Subject: [PATCH 006/176] [Graph DUMP] add schema of the dump related to the model as in dhp-schemas.2.8.31. Note the measere element at the level of the result has been removed because of issues on where to display it: at the level of the result or at the level of the entity --- .../community_infrastructure_schema.json | 35 ++ .../graph/dump/schemas/datasource_schema.json | 192 ++++++++++ .../dump/schemas/organization_schema.json | 57 +++ .../oa/graph/dump/schemas/project_schema.json | 119 ++++++ .../graph/dump/schemas/relation_schema.json | 68 ++++ .../dump/{ => schemas}/result_schema.json | 361 ++++++------------ 6 files changed, 596 insertions(+), 236 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{ => schemas}/result_schema.json (50%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json new file mode 100644 index 000000000..727432a67 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json @@ -0,0 +1,35 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "acronym": { + "type": "string", + "description": "The acronym of the community" + }, + "description": { + "type": "string", + "description": "Description of the research community/research infrastructure" + }, + "id": { + "type": "string", + "description": "OpenAIRE id of the research community/research infrastructure" + }, + "name": { + "type": "string", + "description": "The long name of the community" + }, + "subject": { + "description": "Only for research communities: the list of the subjects associated to the research community", + "type": "array", + "items": {"type": "string"} + }, + "type": { + "type": "string", + "description": "One of {Research Community, Research infrastructure}" + }, + "zenodo_community": { + "type": "string", + "description": "The URL of the Zenodo community associated to the Research community/Research infrastructure" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json new file mode 100644 index 000000000..c416f2320 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json @@ -0,0 +1,192 @@ +{ + "$schema":"http://json-schema.org/draft-07/schema#", + "definitions": { + "ControlledField": { + "type": "object", + "properties": { + "scheme": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" + } + }, + "type":"object", + "properties": { + "accessrights": { + "type": "string", + "description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}" + }, + "certificates": { + "type": "string", + "description": "The certificate, seal or standard the data source complies with. As defined by re3data.org." + }, + "citationguidelineurl": { + "type": "string", + "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." + }, + "contenttypes": { + "description": "Types of content in the data source, as defined by OpenDOAR", + "type": "array", + "items": { + "type": "string" + } + }, + "databaseaccessrestriction": { + "type": "string", + "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" + }, + "datasourcetype": { + "allOf": [ + { + "$ref": "#/definitions/ControlledField" + }, + { + "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" + } + ] + }, + "datauploadrestriction": { + "type": "string", + "description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}" + }, + "dateofvalidation": { + "type": "string", + "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" + }, + "description": { + "type": "string" + }, + "englishname": { + "type": "string", + "description": "The English name of the datasource" + }, + "id": { + "type": "string", + "description": "The OpenAIRE id of the data source" + }, + "journal": { + "type": "object", + "properties": { + "conferencedate": { + "type": "string" + }, + "conferenceplace": { + "type": "string" + }, + "edition": { + "type": "string" + }, + "ep": { + "type": "string", + "description": "End page" + }, + "iss": { + "type": "string", + "description": "Issue number" + }, + "issnLinking": { + "type": "string" + }, + "issnOnline": { + "type": "string" + }, + "issnPrinted": { + "type": "string" + }, + "name": { + "type": "string" + }, + "sp": { + "type": "string", + "description": "Start page" + }, + "vol": { + "type": "string", + "description": "Volume" + } + }, + "description": "Information about the journal, if this data source is of type Journal." + }, + "languages": { + "description": "The languages present in the data source's content, as defined by OpenDOAR.", + "type": "array", + "items": { + "type": "string" + } + }, + "logourl": { + "type": "string" + }, + "missionstatementurl": { + "type": "string", + "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" + }, + "officialname": { + "type": "string", + "description": "The official name of the datasource" + }, + "openairecompatibility": { + "type": "string", + "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." + }, + "originalId": { + "description": "Original identifiers for the datasource" + "type": "array", + "items": { + "type": "string" + } + }, + "pid": { + "description": "Persistent identifiers of the datasource", + "type": "array", + "items": { + "allOf": [ + { + "$ref": "#/definitions/ControlledField" + } + ] + } + }, + "pidsystems": { + "type": "string", + "description": "The persistent identifier system that is used by the data source. As defined by re3data.org" + }, + "policies": { + "description": "Policies of the data source, as defined in OpenDOAR.", + "type": "array", + "items": { + "type": "string" + } + }, + "releaseenddate": { + "type": "string", + "description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org" + }, + "releasestartdate": { + "type": "string", + "description": "Releasing date of the data source, as defined by re3data.org" + }, + "subjects": { + "description": "List of subjects associated to the datasource", + "type": "array", + "items": { + "type": "string" + } + }, + "uploadrights": { + "type": "string", + "description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}" + }, + "versioning": { + "type": "boolean", + "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." + }, + "websiteurl": { + "type": "string" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json new file mode 100644 index 000000000..16afa386d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "alternativenames": { + "description": "Alternative names that identify the organisation", + "type": "array", + "items": { + "type": "string" + } + }, + "country": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The organisation country code" + }, + "label": { + "type": "string", + "description": "The organisation country label" + } + }, + "description": "The country of the organisation" + }, + "id": { + "type": "string", + "description": "The OpenAIRE id for the organisation" + }, + "legalname": { + "type": "string" + }, + "legalshortname": { + "type": "string" + }, + "pid": { + "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The scheme of the identifier (i.e. isni)" + }, + "value": { + "type": "string", + "description": "the value in the schema (i.e. 0000000090326370)" + } + } + } + }, + "websiteurl": { + "type": "string" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json new file mode 100644 index 000000000..c81187258 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json @@ -0,0 +1,119 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "acronym": { + "type": "string" + }, + "callidentifier": { + "type": "string" + }, + "code": { + "type": "string", + "description": "The grant agreement number" + }, + "enddate": { + "type": "string" + }, + "funding": { + "description": "Funding information for the project", + "type": "array", + "items": { + "type": "object", + "properties": { + "funding_stream": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "Description of the funding stream" + }, + "id": { + "type": "string", + "description": "Id of the funding stream" + } + } + }, + "jurisdiction": { + "type": "string", + "description": "The jurisdiction of the funder (i.e. EU)" + }, + "name": { + "type": "string", + "description": "The name of the funder (European Commission)" + }, + "shortName": { + "type": "string", + "description": "The short name of the funder (EC)" + } + } + } + }, + "granted": { + "type": "object", + "properties": { + "currency": { + "type": "string", + "description": "The currency of the granted amount (e.g. EUR)" + }, + "fundedamount": { + "type": "number", + "description": "The funded amount" + }, + "totalcost": { + "type": "number", + "description": "The total cost of the project" + } + }, + "description": "The money granted to the project" + }, + "h2020programme": { + "description": "The h2020 programme funding the project", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The code of the programme" + }, + "description": { + "type": "string", + "description": "The description of the programme" + } + } + } + }, + "id": { + "type": "string", + "description": "OpenAIRE id for the project" + }, + "keywords": { + "type": "string" + }, + "openaccessmandatefordataset": { + "type": "boolean" + }, + "openaccessmandateforpublications": { + "type": "boolean" + }, + "startdate": { + "type": "string" + }, + "subject": { + "type": "array", + "items": { + "type": "string" + } + }, + "summary": { + "type": "string" + }, + "title": { + "type": "string" + }, + "websiteurl": { + "type": "string" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json new file mode 100644 index 000000000..98134a03b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json @@ -0,0 +1,68 @@ +{ + "$schema":"http://json-schema.org/draft-07/schema#", + "definitions": { + "Node": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The OpenAIRE id of the entity" + }, + "type": { + "type": "string", + "description": "The type of the entity (i.e. organisation)" + } + } + } + }, + "type":"object", + "properties": { + "provenance": { + "type": "object", + "properties": { + "provenance": { + "type": "string", + "description": "The reason why OpenAIRE holds the relation " + }, + "trust": { + "type": "string", + "description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)" + } + } + }, + "reltype": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The semantics of the relation (i.e. isAuthorInstitutionOf). " + }, + "type": { + "type": "string", + "description": "the type of the relation (i.e. affiliation)" + } + }, + "description": "To represent the semantics of a relation between two entities" + }, + "source": { + "allOf": [ + {"$ref": "#/definitions/Node"}, + {"description": "The node source in the relation"} + ] + }, + "target": { + "allOf": [ + {"$ref": "#/definitions/Node"}, + {"description": "The node target in the relation"} + ] + }, + "validated":{ + "type":"boolean", + "description":"True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)" + }, + "validationDate":{ + "type":"string", + "description":"The date when the relation was collected from OpenAIRE" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json similarity index 50% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json index cb092110e..a318b7ada 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json @@ -1,9 +1,9 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { - "AccessRight": { - "type": "object", - "properties": { + "AccessRight":{ + "type":"object", + "properties":{ "code": { "type": "string", "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" @@ -12,6 +12,16 @@ "type": "string", "description": "Label for the access mode" }, + "openAccessRoute":{ + "type":"string", + "enum":[ + "gold", + "green", + "hybrid", + "bronze" + ], + "description":"The type of OpenAccess applied to the result" + }, "scheme": { "type": "string", "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" @@ -22,65 +32,40 @@ "type": "object", "properties": { "scheme": { - "type": "string", - "description": "The scheme for the resource" + "type": "string" }, "value": { - "type": "string", - "description": "the value in the scheme" + "type": "string" } - } - }, - "KeyValue": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "Description of key" - }, - "value": { - "type": "string", - "description": "Description of value" - } - } + }, + "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" }, "Provenance": { "type": "object", "properties": { "provenance": { "type": "string", - "description": "The provenance of the information" + "description": "The process that produced/provided the information" }, "trust": { - "type": "string", - "description": "The trust associated to the information" + "type": "string" } - } + }, + "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" } }, "type": "object", "properties": { "author": { - "description": "List of authors of the research results", "type": "array", "items": { "type": "object", "properties": { - "affiliation": { - "description": "Affiliations of the author", - "type": "array", - "items": { - "type": "string", - "description": "One of the affiliation of the author" - } - }, "fullname": { - "type": "string", - "description": "Fullname of the author" + "type": "string" }, "name": { - "type": "string", - "description": "First name of the author" + "type": "string" }, "pid": { "type": "object", @@ -94,44 +79,34 @@ "provenance": { "allOf": [ {"$ref": "#/definitions/Provenance"}, - {"description": "The provenance of the author's pid"} + {"description": "Provenance of author's pid"} ] } - }, - "description": "Persistent identifier of the author (e.g. ORCID)" + } }, "rank": { - "type": "integer", - "description": "Order in which the author appears in the authors list" + "type": "integer" }, "surname": { - "type": "string", - "description": "Surname of the author" + "type": "string" } - }, - "description": "One of the author of the research result" + } } }, - "bestaccessright": { - "allOf": [ - {"$ref": "#/definitions/AccessRight"}, - {"description": "The openest access right associated to the manifestations of this research results"} - ] - }, + "bestaccessright":{ + "allOf":[ + { + "$ref":"#/definitions/AccessRight" + }, + { + "description":"The openest access right associated to the manifestations of this research results" + } + ] + }, "codeRepositoryUrl": { "type": "string", "description": "Only for results with type 'software': the URL to the repository with the source code" }, - "collectedfrom": { - "description": "Information about the sources from which the record has been collected", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Key is the OpenAIRE identifier of the data source, value is its name"} - ] - } - }, "contactgroup": { "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", "type": "array", @@ -150,12 +125,10 @@ "type": "object", "properties": { "conferencedate": { - "type": "string", - "description": "Date of the conference" + "type": "string" }, "conferenceplace": { - "type": "string", - "description": "Place of the conference" + "type": "string" }, "edition": { "type": "string", @@ -170,16 +143,13 @@ "description": "Journal issue" }, "issnLinking": { - "type": "string", - "description": "Journal linking iisn" + "type": "string" }, "issnOnline": { - "type": "string", - "description": "Journal online issn" + "type": "string" }, "issnPrinted": { - "type": "string", - "description": "Journal printed issn" + "type": "string" }, "name": { "type": "string", @@ -187,51 +157,22 @@ }, "sp": { "type": "string", - "description": "Start page" + "description": "start page" }, "vol": { - "type": "string", - "description": "Volume" + "type": "string" } }, "description": "Container has information about the conference or journal where the result has been presented or published" }, - "context": { - "description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "Code identifying the RI/RC" - }, - "label": { - "type": "string", - "description": "Label of the RI/RC" - }, - "provenance": { - "description": "Why this result is associated to the RI/RC.", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/Provenance"} - - ] - } - } - } - } - }, "contributor": { - "description": "Contributors of this result", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Contributors for the result" } }, "country": { - "description": "Country associated to this result", "type": "array", "items": { "type": "object", @@ -241,8 +182,7 @@ "description": "ISO 3166-1 alpha-2 country code" }, "label": { - "type": "string", - "description": "English label of the country" + "type": "string" }, "provenance": { "allOf": [ @@ -274,39 +214,13 @@ "type": "array", "items": { "type": "string" - } }, "embargoenddate": { "type": "string", "description": "Date when the embargo ends and this result turns Open Access" }, - "externalReference": { - "description": "Links to external resources like entries from thematic databases (e.g. Protein Data Bank)", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is linked to the external resource"} - ] - }, - "typology": { - "type": "string" - }, - "value": { - "type": "string" - } - } - } - }, "format": { - "type": "array", "items": { "type": "string" @@ -332,56 +246,87 @@ }, "id": { "type": "string", - "description": "OpenAIRE identifier" + "description": "OpenAIRE Identifier" }, - "instance": { - "description": "Manifestations (i.e. different versions) of the result. For example: the pre-print and the published versions are two manifestations of the same research result", - "type": "array", - "items": { - "type": "object", - "properties": { - "accessright": { - "allOf": [ - {"$ref": "#/definitions/AccessRight"}, - {"description": "Access right of this instance"} + "instance":{ + "description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", + "type":"array", + "items":{ + "type":"object", + "properties":{ + "accessright":{ + "allOf":[ + { + "$ref":"#/definitions/AccessRight" + }, + { + "description":"The accessright of this instance. One result may have multiple instances, each with a different accessright" + } ] }, - "collectedfrom": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Information about the source from which the instance has been collected. Key is the OpenAIRE identifier of the data source, value is its name"} - ] + "alternateIdentifier":{ + "description":"All the identifiers associated to the result other than the authoritative ones", + "type":"array", + "items":{ + "allOf":[ + { + "$ref":"#/definitions/ControlledField" + }, + { + "description":"Description of alternateIdentifier" + } + ] + } + }, + "articleprocessingcharge":{ + "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", + "type":"object", + "properties":{ + "amount":{ + "type":"string" + }, + "currency":{ + "type":"string" + } + } }, - "hostedby": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Information about the source from which the instance can be viewed or downloaded. Key is the OpenAIRE identifier of the data source, value is its name"} - ] + "license":{ + "type":"string" }, - "license": { - "type": "string", - "description": "License applied to the instance" + "pid":{ + "description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)", + "type":"array", + "items":{ + "allOf":[ + { + "$ref":"#/definitions/ControlledField" + }, + { + "description":"The persistent identifier associated to the result" + } + ] + } + }, + "publicationdate":{ + "type":"string" }, - "publicationdate": { - "type": "string", - "description": "Publication date of the instance" + "refereed":{ + "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", + "type":"string" }, - "refereed": { - "type": "string", - "description": "Was the instance subject to peer-review? Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed' (see also https://api.openaire.eu/vocabularies/dnet:review_levels)" + "type":{ + "type":"string", + "description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" }, - "type": { - "type": "string", - "description": "Type of the instance. Possible values are listed at https://api.openaire.eu/vocabularies/dnet:publication_resource" - }, - "url": { - "description":"Location where the instance is accessible", - "type": "array", - "items": { - "type": "string" + "url":{ + "description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", + "type":"array", + "items":{ + "type":"string" } } } + } }, "language": { @@ -402,8 +347,7 @@ "description": "Timestamp of last update of the record in OpenAIRE" }, "maintitle": { - "type": "string", - "description": "Title" + "type": "string" }, "originalId": { "description": "Identifiers of the record at the original sources", @@ -418,74 +362,20 @@ "items": { "allOf": [ {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "} + {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "} ] } }, + "programmingLanguage": { "type": "string", "description": "Only for results with type 'software': the programming language" }, - "projects": { - "description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results", - "type": "array", - "items": { - "type": "object", - "properties": { - "acronym": { - "type": "string", - "description": "Project acronym" - }, - "code": { - "type": "string", - "description": "Grant code" - }, - "funder": { - "type": "object", - "properties": { - "fundingStream": { - "type": "string", - "description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)" - }, - "jurisdiction": { - "type": "string", - "description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" - }, - "name": { - "type": "string", - "description": "Name of the funder" - }, - "shortName": { - "type": "string", - "description": "Short name or acronym of the funder" - } - }, - "description": "Information about the funder funding the project" - }, - "id": { - "type": "string", - "description": "OpenAIRE identifier of the project" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this project is associated to the result"} - ] - }, - "title": { - "type": "string", - "description": "Title of the project" - } - } - } - }, "publicationdate": { - "type": "string", - "description": "Date of publication" + "type": "string" }, "publisher": { - "type": "string", - "description": "Publisher" + "type": "string" }, "size": { "type": "string", @@ -513,15 +403,14 @@ "subject": { "allOf": [ {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary). "} + {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} ] } } } }, "subtitle": { - "type": "string", - "description": "Sub-title of the result" + "type": "string" }, "tool": { "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", From a22c29fba185a799c37e46ffe09a126b0b97eba2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 12:08:33 +0100 Subject: [PATCH 007/176] [Graph DUMP] Filtering out from the originalIds the id of the result in OpenAIRE --- .../main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 9e2212f4c..8e38a5515 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -133,7 +133,7 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); + out.setOriginalId(input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); Optional> oInst = Optional .ofNullable(input.getInstance()); From 5681e89544a676f8b4446a51510001e6252f6fff Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 5 Nov 2021 12:18:24 +0100 Subject: [PATCH 008/176] Update 'dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json' --- .../oa/graph/dump/schemas/result_schema.json | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json index a318b7ada..d4f05e5ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json @@ -308,7 +308,8 @@ } }, "publicationdate":{ - "type":"string" + "type":"string", + "description": "Date of the research product" }, "refereed":{ "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", @@ -326,7 +327,6 @@ } } } - } }, "language": { @@ -338,7 +338,7 @@ }, "label": { "type": "string", - "description": "English label" + "description": "Language label in English" } } }, @@ -347,7 +347,12 @@ "description": "Timestamp of last update of the record in OpenAIRE" }, "maintitle": { - "type": "string" + "type": "string", + "descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." + }, + "subtitle": { + "type": "string", + "descriptio": "Explanatory or alternative name by which a scientific result is known." }, "originalId": { "description": "Identifiers of the record at the original sources", @@ -366,16 +371,17 @@ ] } }, - "programmingLanguage": { "type": "string", "description": "Only for results with type 'software': the programming language" }, "publicationdate": { - "type": "string" + "type": "string", + "description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." }, "publisher": { - "type": "string" + "type": "string", + "description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." }, "size": { "type": "string", @@ -409,9 +415,6 @@ } } }, - "subtitle": { - "type": "string" - }, "tool": { "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", "type": "array", From 8442efd8d1f7c2d76ce0fd4bcef57a3752d04fa3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 12:29:22 +0100 Subject: [PATCH 009/176] [Graph DUMP] Filtering out from the originalIds the id of the result in OpenAIRE --- .../main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 8e38a5515..617700700 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -133,7 +133,9 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); - out.setOriginalId(input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); + out + .setOriginalId( + input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); Optional> oInst = Optional .ofNullable(input.getInstance()); From 94918a673cf5498ce2dc5709cfc1dbf96e44d80a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 8 Nov 2021 10:25:28 +0100 Subject: [PATCH 010/176] [Graph DUMP] Fix issue for empty origilaId list --- .../dhp/oa/graph/dump/ResultMapper.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 617700700..b6ac663b8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -133,9 +133,21 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); - out - .setOriginalId( - input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); + out.setOriginalId(new ArrayList<>()); + Optional + .ofNullable(input.getOriginalId()) + .ifPresent( + v -> out + .setOriginalId( + input + .getOriginalId() + .stream() + .filter(s -> !s.startsWith("50|")) + .collect(Collectors.toList()))); +// Optional +// .ofNullable( +// input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())) +// .ifPresent(v -> out.setOriginalId(v)); Optional> oInst = Optional .ofNullable(input.getInstance()); From 8cc50ecee05d311fe8b3adc02caf9a2b6237bd25 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Nov 2021 08:59:20 +0100 Subject: [PATCH 011/176] [Graph Dump] changed AccessRight with BestAccessRight in the dump and modified the dependency to the schema to the SNAPSHOT --- .../main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 3 ++- pom.xml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index b6ac663b8..78f0b6796 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -65,7 +65,8 @@ public class ResultMapper implements Serializable { String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); out .setBestaccessright( - AccessRight + + BestAccessRight .newInstance( code, Constants.coarCodeLabelMap.get(code), diff --git a/pom.xml b/pom.xml index 02bc5d8d4..53b5938e0 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.21] + [2.8.22-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From e5498052e801d663a95d23c4172c50d812bb18f0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Nov 2021 16:14:10 +0100 Subject: [PATCH 012/176] [Graph DUmp] removed OpenAccessRoute from test in best access right --- .../src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index bf6301ec4..6d2bced94 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -331,7 +331,7 @@ public class DumpJobTest { Assertions .assertEquals( Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); - Assertions.assertEquals(null, gr.getBestaccessright().getOpenAccessRoute()); + Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); From b8bdabfae95c017d9c4ab84e1c1778699c2f8127 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Nov 2021 16:16:48 +0100 Subject: [PATCH 013/176] [Graph DUmp] removed OpenAccessRoute from test in best access right --- .../src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 6d2bced94..602aaf6e6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -331,7 +331,6 @@ public class DumpJobTest { Assertions .assertEquals( Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); - Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); From 6595135a1a870f67b53aa0a8baa15fbe2a984a3c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Nov 2021 11:45:38 +0100 Subject: [PATCH 014/176] [Dump Schemas] changed the schema of the dumped result according to the modifications in the bestAccessRight type --- .../oa/graph/dump/schemas/result_schema.json | 95 ++++++++----------- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 4 +- 2 files changed, 41 insertions(+), 58 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json index d4f05e5ae..a1e09525e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json @@ -1,34 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { - "AccessRight":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "openAccessRoute":{ - "type":"string", - "enum":[ - "gold", - "green", - "hybrid", - "bronze" - ], - "description":"The type of OpenAccess applied to the result" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "ControlledField": { + "ControlledField": { "type": "object", "properties": { "scheme": { @@ -94,14 +67,21 @@ } }, "bestaccessright":{ - "allOf":[ - { - "$ref":"#/definitions/AccessRight" - }, - { - "description":"The openest access right associated to the manifestations of this research results" - } - ] + "type":"object", + "properties":{ + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + } }, "codeRepositoryUrl": { "type": "string", @@ -255,29 +235,32 @@ "type":"object", "properties":{ "accessright":{ - "allOf":[ - { - "$ref":"#/definitions/AccessRight" + "type":"object", + "properties":{ + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" }, - { - "description":"The accessright of this instance. One result may have multiple instances, each with a different accessright" + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "openAccessRoute":{ + "type":"string", + "enum":[ + "gold", + "green", + "hybrid", + "bronze" + ], + "description":"The type of OpenAccess applied to the result" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" } - ] + } }, - "alternateIdentifier":{ - "description":"All the identifiers associated to the result other than the authoritative ones", - "type":"array", - "items":{ - "allOf":[ - { - "$ref":"#/definitions/ControlledField" - }, - { - "description":"Description of alternateIdentifier" - } - ] - } - }, "articleprocessingcharge":{ "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", "type":"object", diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 697ec705f..2cc53027e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -9,7 +9,7 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; -@Disabled +//@Disabled class GenerateJsonSchema { @Test @@ -21,7 +21,7 @@ class GenerateJsonSchema { configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(Relation.class); + JsonNode jsonSchema = generator.generateSchema(GraphResult.class); System.out.println(jsonSchema.toString()); } From 99d86134f518627a45c87bf02516ff9c3f8d76ce Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 16 Nov 2021 12:04:21 +0100 Subject: [PATCH 015/176] [Graph Dump] changed the dump since the measures have been moded at the level of the instance --- .../dhp/oa/graph/dump/ResultMapper.java | 20 ++++++++++--------- .../dhp/oa/graph/dump/DumpJobTest.java | 15 ++++++++++---- .../resultDump/publication_extendedinstance | 2 +- pom.xml | 2 +- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 78f0b6796..3a29a2bbf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -43,15 +43,6 @@ public class ResultMapper implements Serializable { try { addTypeSpecificInformation(out, input, ort); -// Optional> mes = Optional.ofNullable(input.getMeasures()); -// if (mes.isPresent()) { -// List measure = new ArrayList<>(); -// mes -// .get() -// .forEach( -// m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); -// out.setMeasures(measure); -// } Optional .ofNullable(input.getAuthor()) @@ -460,6 +451,17 @@ public class ResultMapper implements Serializable { code, Constants.coarCodeLabelMap.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); + + Optional> mes = Optional.ofNullable(i.getMeasures()); + if (mes.isPresent()){ + List measure = new ArrayList<>(); + mes + .get() + .forEach( + m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); + instance.setMeasures(measure ); + } + if (opAr.get().getOpenAccessRoute() != null) { switch (opAr.get().getOpenAccessRoute()) { case hybrid: diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 602aaf6e6..89b12a712 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -171,10 +171,14 @@ public class DumpJobTest { GraphResult gr = verificationDataset.first(); - Assertions.assertEquals(2, gr.getMeasures().size()); + Assertions.assertEquals(1, gr.getInstance().size()); + + Assertions.assertEquals(2, gr.getInstance().get(0).getMeasures().size()); Assertions .assertTrue( gr + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( @@ -183,6 +187,8 @@ public class DumpJobTest { Assertions .assertTrue( gr + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( @@ -357,11 +363,12 @@ public class DumpJobTest { Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId()); - Assertions.assertEquals(2, gr.getOriginalId().size()); + System.out.println(gr.getOriginalId().size()); + + Assertions.assertEquals(1, gr.getOriginalId().size()); Assertions .assertTrue( - gr.getOriginalId().contains("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2") - && gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); + gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); Assertions.assertEquals(1, gr.getPid().size()); Assertions diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance index 043be7b1a..468811513 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance @@ -1 +1 @@ -{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 274192c67..616d10c18 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.22] + [2.9.23] [4.0.3] [6.0.5] [3.1.6] From c7c0c3187b73c7ac9cbf3b26e9b70f9dc77281f9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 16 Nov 2021 13:56:32 +0100 Subject: [PATCH 016/176] [AFFILIATION PROPAGATION] Applied some SonarLint suggestions --- .../PrepareInfo.java | 20 ++++++------------- .../SparkResultToOrganizationFromSemRel.java | 7 ++++--- .../StepActions.java | 10 ++++------ 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java index 732ba2c2d..d853f3858 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -30,25 +30,17 @@ import scala.Tuple2; public class PrepareInfo implements Serializable { - // leggo le relazioni e seleziono quelle fra result ed organizzazioni - // raggruppo per result e salvo - // r => {o1, o2, o3} - - // leggo le relazioni fra le organizzazioni e creo la gerarchia delle parentele: - // hashMap key organizzazione -> value tutti i suoi padri - // o => {p1, p2} - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class); // associate orgs with all their parent - private static final String relOrgQuery = "SELECT target key, collect_set(source) as valueSet " + + private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet " + "FROM relation " + "WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() + "' and datainfo.deletedbyinference = false " + "GROUP BY target"; - private static final String relResQuery = "SELECT source key, collect_set(target) as valueSet " + + //associates results with all the orgs they are affiliated to + private static final String RESULT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " + "FROM relation " + "WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + "' and datainfo.deletedbyinference = false " + @@ -101,7 +93,7 @@ public class PrepareInfo implements Serializable { relation.createOrReplaceTempView("relation"); spark - .sql(relOrgQuery) + .sql(ORGANIZATION_ORGANIZATION_QUERY) .as(Encoders.bean(KeyValueSet.class)) .write() .mode(SaveMode.Overwrite) @@ -109,7 +101,7 @@ public class PrepareInfo implements Serializable { .json(childParentOrganizationPath); spark - .sql(relResQuery) + .sql(RESULT_ORGANIZATION_QUERY) .as(Encoders.bean(KeyValueSet.class)) .write() .mode(SaveMode.Overwrite) @@ -130,7 +122,7 @@ public class PrepareInfo implements Serializable { "' and datainfo.deletedbyinference = false") .as(Encoders.STRING()); - // prendo dalla join i risultati che hanno solo il lato sinistro: sono foglie + // takes from the join the entities having only the left hand side: the leaves. Saves them children .joinWith(parent, children.col("child").equalTo(parent.col("parent")), "left") .map((MapFunction, String>) value -> { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index a900fe0d8..f5d8361d7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -32,6 +32,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation; public class SparkResultToOrganizationFromSemRel implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); private static final int MAX_ITERATION = 5; + public static final String NEW_RELATION_PATH = "/newRelation"; public static void main(String[] args) throws Exception { @@ -120,11 +121,11 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { iteration++; StepActions .execStep( - spark, graphPath, workingPath + "/newRelation", + spark, graphPath, workingPath + NEW_RELATION_PATH, leavesPath, childParentPath, resultOrganizationPath); StepActions .prepareForNextStep( - spark, workingPath + "/newRelation", resultOrganizationPath, leavesPath, + spark, workingPath + NEW_RELATION_PATH, resultOrganizationPath, leavesPath, childParentPath, workingPath + "/leaves", workingPath + "/resOrg"); moveOutput(spark, workingPath, leavesPath, resultOrganizationPath); leavesCount = readPath(spark, leavesPath, Leaves.class).count(); @@ -154,7 +155,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { propagationCounter.getNotReachedFirstParent().add(1); } - addNewRelations(spark, workingPath + "/newRelation", outputPath); + addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); } private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java index affe1f56f..d190e5342 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -27,7 +27,6 @@ import scala.Tuple2; public class StepActions implements Serializable { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final Logger log = LoggerFactory.getLogger(StepActions.class); public static void execStep(SparkSession spark, String graphPath, String newRelationPath, @@ -185,10 +184,9 @@ public class StepActions implements Serializable { "GROUP BY resId") .as(Encoders.bean(KeyValueSet.class)); - // resultParent.foreach((ForeachFunction)kv -> - // System.out.println(OBJECT_MAPPER.writeValueAsString(kv))); + // create new relations from result to organization for each result linked to a leaf - Dataset tmp = resultParent + return resultParent .flatMap( (FlatMapFunction) v -> v .getValueSet() @@ -206,8 +204,8 @@ public class StepActions implements Serializable { .collect(Collectors.toList()) .iterator(), Encoders.bean(Relation.class)); - tmp.foreach((ForeachFunction) r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); - return tmp; + + } } From 7c96e3fd461db7a7f04ccf2d04a6534f3fd82348 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 16 Nov 2021 13:57:26 +0100 Subject: [PATCH 017/176] removed not useful dir --- .../dhp-enrichment/null/leaves/._SUCCESS.crc | Bin 8 -> 0 bytes ...9-ad38-4761-a051-bb601aecc3f0-c000.json.gz.crc | Bin 12 -> 0 bytes dhp-workflows/dhp-enrichment/null/leaves/_SUCCESS | 0 ...e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz | Bin 20 -> 0 bytes .../dhp-enrichment/null/newRelation/._SUCCESS.crc | Bin 8 -> 0 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 12 -> 0 bytes ...4-8e11-4513-a719-beee952de6ee-c000.json.gz.crc | Bin 12 -> 0 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-49b1-48ae-951b-6febe9db8857-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 12 -> 0 bytes ...f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc | Bin 12 -> 0 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-e550-47d3-90a8-5854c6c07c5e-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 12 -> 0 bytes ...7-c748-4164-aba1-1e45fbadda99-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 12 -> 0 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 12 -> 0 bytes ...7-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-6383-4d37-97e4-124125abfca5-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 12 -> 0 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-16d5-4381-a03b-9f2ed0830707-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 12 -> 0 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 12 -> 0 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 12 -> 0 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 12 -> 0 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 12 -> 0 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 12 -> 0 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 12 -> 0 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 12 -> 0 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 12 -> 0 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 12 -> 0 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 12 -> 0 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc | Bin 12 -> 0 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 12 -> 0 bytes ...6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc | Bin 12 -> 0 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 12 -> 0 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 12 -> 0 bytes ...4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc | Bin 12 -> 0 bytes ...3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc | Bin 12 -> 0 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 12 -> 0 bytes ...0-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-3a44-41e4-963b-5f602891de01-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 12 -> 0 bytes ...e-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc | Bin 12 -> 0 bytes ...9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc | Bin 12 -> 0 bytes ...c-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc | Bin 12 -> 0 bytes ...8-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc | Bin 12 -> 0 bytes ...2-f830-4638-984b-becff6aa4587-c000.json.gz.crc | Bin 12 -> 0 bytes ...b-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc | Bin 12 -> 0 bytes ...a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc | Bin 12 -> 0 bytes .../dhp-enrichment/null/newRelation/_SUCCESS | 0 ...d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 20 -> 0 bytes ...487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 20 -> 0 bytes ...52c24-8e11-4513-a719-beee952de6ee-c000.json.gz | Bin 20 -> 0 bytes ...192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 20 -> 0 bytes ...8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz | Bin 20 -> 0 bytes ...fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 20 -> 0 bytes ...1fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz | Bin 20 -> 0 bytes ...125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 20 -> 0 bytes ...369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz | Bin 20 -> 0 bytes ...a0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 20 -> 0 bytes ...2f647-c748-4164-aba1-1e45fbadda99-c000.json.gz | Bin 20 -> 0 bytes ...5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 20 -> 0 bytes ...63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 20 -> 0 bytes ...97392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 20 -> 0 bytes ...24405-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 20 -> 0 bytes ...06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 20 -> 0 bytes ...8cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 20 -> 0 bytes ...d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz | Bin 20 -> 0 bytes ...dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 20 -> 0 bytes ...4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 20 -> 0 bytes ...7934e-6383-4d37-97e4-124125abfca5-c000.json.gz | Bin 20 -> 0 bytes ...a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 20 -> 0 bytes ...e3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 20 -> 0 bytes ...4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz | Bin 20 -> 0 bytes ...70332-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 20 -> 0 bytes ...8af72-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 20 -> 0 bytes ...5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 20 -> 0 bytes ...5d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 20 -> 0 bytes ...4cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 20 -> 0 bytes ...e3882-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 20 -> 0 bytes ...2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 20 -> 0 bytes ...9bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 20 -> 0 bytes ...d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 356 -> 0 bytes ...192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 356 -> 0 bytes ...fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 356 -> 0 bytes ...125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 356 -> 0 bytes ...5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 356 -> 0 bytes ...63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 356 -> 0 bytes ...97392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 356 -> 0 bytes ...24405-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 356 -> 0 bytes ...dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 356 -> 0 bytes ...4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 356 -> 0 bytes ...e3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 356 -> 0 bytes ...8af72-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 356 -> 0 bytes ...5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 356 -> 0 bytes ...e3882-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 356 -> 0 bytes ...2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 356 -> 0 bytes ...9bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 356 -> 0 bytes ...487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 323 -> 0 bytes ...a0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 323 -> 0 bytes ...06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 323 -> 0 bytes ...8cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 323 -> 0 bytes ...a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 323 -> 0 bytes ...70332-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 323 -> 0 bytes ...5d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 323 -> 0 bytes ...4cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 323 -> 0 bytes ...487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 326 -> 0 bytes ...a0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 326 -> 0 bytes ...06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 326 -> 0 bytes ...8cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 326 -> 0 bytes ...a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 326 -> 0 bytes ...70332-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 326 -> 0 bytes ...5d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 326 -> 0 bytes ...4cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 326 -> 0 bytes ...d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 357 -> 0 bytes ...487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 357 -> 0 bytes ...192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 357 -> 0 bytes ...fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 357 -> 0 bytes ...a0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 357 -> 0 bytes ...97392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 357 -> 0 bytes ...06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 357 -> 0 bytes ...8cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 357 -> 0 bytes ...a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 357 -> 0 bytes ...e3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 357 -> 0 bytes ...70332-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 357 -> 0 bytes ...8af72-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 357 -> 0 bytes ...5d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 357 -> 0 bytes ...4cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 357 -> 0 bytes ...2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 357 -> 0 bytes ...9bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 357 -> 0 bytes ...d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 398 -> 0 bytes ...192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 398 -> 0 bytes ...fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 398 -> 0 bytes ...125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 324 -> 0 bytes ...5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 324 -> 0 bytes ...63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 324 -> 0 bytes ...97392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 398 -> 0 bytes ...24405-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 324 -> 0 bytes ...dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 324 -> 0 bytes ...4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 324 -> 0 bytes ...e3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 398 -> 0 bytes ...8af72-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 398 -> 0 bytes ...5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 324 -> 0 bytes ...e3882-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 324 -> 0 bytes ...2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 398 -> 0 bytes ...9bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 398 -> 0 bytes ...d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 323 -> 0 bytes ...487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz | Bin 323 -> 0 bytes ...192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 323 -> 0 bytes ...fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 323 -> 0 bytes ...a0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz | Bin 323 -> 0 bytes ...97392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 323 -> 0 bytes ...06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz | Bin 323 -> 0 bytes ...8cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz | Bin 323 -> 0 bytes ...a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz | Bin 323 -> 0 bytes ...e3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 323 -> 0 bytes ...70332-a058-44b8-962c-4350e0e108a7-c000.json.gz | Bin 323 -> 0 bytes ...8af72-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 323 -> 0 bytes ...5d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz | Bin 323 -> 0 bytes ...4cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz | Bin 323 -> 0 bytes ...2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 323 -> 0 bytes ...9bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 323 -> 0 bytes ...d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 348 -> 0 bytes ...192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 348 -> 0 bytes ...fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 348 -> 0 bytes ...125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz | Bin 348 -> 0 bytes ...5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz | Bin 348 -> 0 bytes ...63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz | Bin 348 -> 0 bytes ...97392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 348 -> 0 bytes ...24405-f58b-4774-8782-b9e7c25f989f-c000.json.gz | Bin 348 -> 0 bytes ...dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz | Bin 348 -> 0 bytes ...4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz | Bin 348 -> 0 bytes ...e3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 348 -> 0 bytes ...8af72-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 348 -> 0 bytes ...5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz | Bin 348 -> 0 bytes ...e3882-3a44-41e4-963b-5f602891de01-c000.json.gz | Bin 348 -> 0 bytes ...2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 348 -> 0 bytes ...9bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 348 -> 0 bytes ...d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz | Bin 324 -> 0 bytes ...192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz | Bin 324 -> 0 bytes ...fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz | Bin 324 -> 0 bytes ...97392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz | Bin 324 -> 0 bytes ...e3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz | Bin 324 -> 0 bytes ...8af72-f830-4638-984b-becff6aa4587-c000.json.gz | Bin 324 -> 0 bytes ...2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz | Bin 324 -> 0 bytes ...9bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz | Bin 324 -> 0 bytes .../dhp-enrichment/null/resOrg/._SUCCESS.crc | Bin 8 -> 0 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 12 -> 0 bytes ...5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc | Bin 12 -> 0 bytes dhp-workflows/dhp-enrichment/null/resOrg/_SUCCESS | 0 ...35ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 20 -> 0 bytes ...35ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 169 -> 0 bytes ...35ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 137 -> 0 bytes ...35ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 184 -> 0 bytes ...35ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 126 -> 0 bytes ...35ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz | Bin 164 -> 0 bytes 292 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dhp-workflows/dhp-enrichment/null/leaves/._SUCCESS.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/leaves/.part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/leaves/_SUCCESS delete mode 100644 dhp-workflows/dhp-enrichment/null/leaves/part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/._SUCCESS.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/_SUCCESS delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/newRelation/part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/._SUCCESS.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00000-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00047-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00055-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/.part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz.crc delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/_SUCCESS delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00000-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00047-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00055-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz delete mode 100644 dhp-workflows/dhp-enrichment/null/resOrg/part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz diff --git a/dhp-workflows/dhp-enrichment/null/leaves/._SUCCESS.crc b/dhp-workflows/dhp-enrichment/null/leaves/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 PcmYc;N@ieSU}69O2$TUk diff --git a/dhp-workflows/dhp-enrichment/null/leaves/.part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/leaves/.part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/leaves/_SUCCESS b/dhp-workflows/dhp-enrichment/null/leaves/_SUCCESS deleted file mode 100644 index e69de29bb..000000000 diff --git a/dhp-workflows/dhp-enrichment/null/leaves/part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz b/dhp-workflows/dhp-enrichment/null/leaves/part-00000-366e33b9-ad38-4761-a051-bb601aecc3f0-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/._SUCCESS.crc b/dhp-workflows/dhp-enrichment/null/newRelation/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 PcmYc;N@ieSU}69O2$TUk diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc deleted file mode 100644 index 444ebf0e34af16d32e663373a50f0161b9d62839..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A{p5&H!I5Jm#F diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc deleted file mode 100644 index 3c70b3ca0bb0105e1a1d3e2d4b87dffc34279883..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&!rRfa-5o-dC diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc deleted file mode 100644 index 9690bbf5222c463c882b01ad99b0f5b07868ab7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DgHF=YV&60`$^ diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00049-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc deleted file mode 100644 index 557a5a816fd1f6ace1abdac1bdaf029130766219..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DhNd+!|p69EJ4 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc deleted file mode 100644 index b3370486fb295f23b42e0034756726538c75d20c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Df%`Sc3_64?Xg diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc deleted file mode 100644 index c0e5dc3160176fc74b43c74a92873202cf6f48e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(d`C>T$6OseN diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00115-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc deleted file mode 100644 index ba9dc02f200333afbab6d6d41e643ee3829765cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E5~FHQmg52XTE diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00166-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc deleted file mode 100644 index 2f7fbb2e7d7779ad00d319091cc6ec02c3032d6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(N^Z6bC6sQD9 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc deleted file mode 100644 index 5ea578242fce12608d5ad0076d5368e5ab1e5ce3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E?yp{fZ05-$Rt diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc b/dhp-workflows/dhp-enrichment/null/newRelation/.part-00192-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz.crc deleted file mode 100644 index fcf53cc925f084a3da1796db662c25862b516e87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fsLBWc5gGzH diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/_SUCCESS b/dhp-workflows/dhp-enrichment/null/newRelation/_SUCCESS deleted file mode 100644 index e69de29bb..000000000 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-08f52c24-8e11-4513-a719-beee952de6ee-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-2dc8bfd2-49b1-48ae-951b-6febe9db8857-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3611fa1f-4b12-4f0b-9f3b-f29a7a51b646-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-3c4369da-e550-47d3-90a8-5854c6c07c5e-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4332f647-c748-4164-aba1-1e45fbadda99-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-810d7e27-1b9a-4ecc-aafa-ceb787179fe5-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a127934e-6383-4d37-97e4-124125abfca5-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-beb4081a-16d5-4381-a03b-9f2ed0830707-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00000-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz deleted file mode 100644 index 001322f84b053326cd87d0b2f1df13fddaa4da35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 Rcmb2|=3syTW+=_T000et0JZ=C diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00006-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz deleted file mode 100644 index a92a574363a35b56952a13cd4f2d141e75662967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 356 zcmV-q0h|6GiwFP!000000CiHaPQx$|-T8{hbA^)7G>Ny0sS^mXAyi%Lb7JJ$k?jwnyLUb^2@y6{ImVAnqQQu=J7}xWqZP=w0uyk-t}ml^d$f>xjEWI@ zh}v0p!FZI6M?WRbUI*Qoo63z0B9UmQFc5+!67v`)6S?y}SpAh$dBB}VL*fZ$VK zhDeC~zNzMVjs$(w7#M$hU(izKakfs70m6EUgYz33qK?xgHbT4}M!W{_^lSgwhnsu~ zyUM>M%4ytws%@SZoR>}8VpSKbyshiDST$u?Dv4dyijGP-lc(}01Do~+l>paJzfhy_l!D#w9^R3fqPj^2&;{s$#0ssJb CKdVy! diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00048-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz deleted file mode 100644 index 2298c84f35b5aa39d6f4462670bb5e978eb563d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfYniwFP!000000G(1hPXjRw-uV@$%oRE+`pQ-@bpjza#NbPuyP!0w5~orr z{qHzw)hh!FqvP*kf6h^8AlP^lgggq34heO4u-;%quTV$>F5yI6Uv@F<(L?PCE_ToZ z+2F+m6HyCEF;~vsCfm7DWh8^DBsNtJQnE}E5mR;~L+ruZuc|5&ZX-GtuP_hWV*4`M zHo-JtjCJ;0QfSUH4Gl5merQ;MTZ*4ug(MbBYQQa4Q|*!DIM-VJ##|i#K%FLnS53r- z_I^1lb%_KcSxiE{e@u8O_f$q_$N*`bX$W!S6Isfa*ckD4*x@yR=bQb@93jUV_N@KZ z0X~7nmBS?Eg92>3<+Pj4mC~z)n(FSs%vVc|X3;Jl3psbMSNWTqxJ3|qER#?Fp}PlX V`R9~d*VCTvegKR-kKalH000qVk}3cI diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00049-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz deleted file mode 100644 index fe56417e97d74ab67f9276c2107723cc38754721..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 326 zcmV-M0lEGkiwFP!000000G(1jYr`-Q-TNzy=1?4ilUQ5ml&Ph3D<#O*$ri|xkxnUT zn*ZLD65@2|((!cf(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5Dy(P1@L)S&TnQ6k!EP^;cL%T{_ZStE z=mlzL*%jkKGLAlG&Ory=nptHgi!PJMv@;?EO(fLzzEq?T0lO}we5amu8x18&I?AX z1|&rJxT#Z}A_0#Y0^{!=3tGxNrO^p8h_H<~IKQ(2HO6afgm^nna0}r1ZvQlgi#~^4 zm){a(oc5n;UzSVGt4-5DyNy0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-07f487a3-f095-44a3-972a-a16a4be763dc-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-40ca0d2a-e8cd-4e7b-aca5-771b6ad3de55-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-59d06ec6-72d5-432a-bf9c-444d99f2c7db-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-7888cb4e-b25f-4d29-b277-ac25b25142cb-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a52a9e1b-465e-4d14-905a-1c0027db70f8-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-c7870332-a058-44b8-962c-4350e0e108a7-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-e405d44c-758a-4019-a3a4-55ca75b6d857-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-ef74cb50-c002-43f9-b2e8-65c57158af28-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00077-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz deleted file mode 100644 index 5e63538ede2100ae5998b99b4d0bc4cba57fae99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 357 zcmV-r0h<0FiwFP!000000CiHaPQx$|-T8{hbA{ZLG>Ny0sS^mXAyi%Lb7SP%mF*Oj z(tqdTRt*e{=X>wnyLUb@1qs$pdnQgyp~Zx%I~u1kpp(di1{-n4ZZG@b59lEG7!`Zi zLNea5D<+^~B84Gy4knt;PAU@_WG2y2X(2{KBo;6ZHVGfL;LKN66%ls_lS#uhHbT4}_jn87>DT_F4?lz) zcE!IHs$swR)Emy{LX=J0VpY$VyshhYzHG{})C#+*l^vC8BoE!oP5xI-)JhPxI65y$ z*TV;pX=yQ%ZU@lvMX4mO8nuv()^)?>L%Hm@oVS|D2e5{I&Uov5+VkBHTh#5|N&)}? Dx;?59 diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00115-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz deleted file mode 100644 index 6f7008b990668e5684da12efdc23ea58cd6df210..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 398 zcmV;90df8xiwFP!000000PRvuYa1~Tz4uovI-8>PuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQFD@<*v~l%4Fqe~c_EKNqeVj99*pZSpi?Mh2R7nFTwiv<@6kc&2`+Zf z0~znc1rtyUNg)-^-bB;dQDr29DkL^l7Gg9^5&>hfBz@??nXjTMBW?p)7B4Ud+id$f z+E%`+pzDk^=aPJNmZ@k6KJ`Pz3fxlsYzri^SX2XUv6yOyB*(c{>Nn=%_y_7V5xi<7 zM%4FP ztvzgj#g)Y`vYBFsYB{fKoGzew(wUsvx2yb3PTV30J?6l}m0ssIHQPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdPuJ%K>&{IwcA-7V>tTghf$dZw? zhSbe}@2q0%6j~^`1)|$%-g`6iW=>2)hFfm}lP9LpVMaYYS#L0+SIDFRmvF-_KZh8O z=%M5o1qawcw!yPACZc9C$E9$NHrdHtRjy=Eg+xV_gOn_hSj4orEJNIZw?B)jOt_Ee zNW8#2?ANmQ*RoYG6&PckeF<}@USz5~$FS_?iWDfN=-U-YBC(`q++!)KJ!X0B>qh;F zIamA#>UqOx)RYOqKM!J!S4%Kvi;2mv-!?Rq`?9u9kQvg=lIIX_eabd3XWIzz_;kRF z0e7$FU;7A4EM^P-O9%J?B(5ALDS6Yk!=xsmd(o;<)M!wRJ?B^tAUeI0m+9dm|H>yy zWr#amJKw%R_Xy6?<*d6dt9|}-`naU`dr7qdI6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&I6b;h~Z+N6C>BI>`SFm z`rkRbRZ|8Q#`C?8yLUba4Fqd%f{+KH(IKJk_SPGW=oJcSz$KiC%gZ)~9eOA|!NnGO zARD|mV%w% zi2Al&m}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-102192a9-b290-4b46-a1b2-71d347c94003-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-311fa8dc-bf3e-4d9a-934b-50ec40b310c5-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-362125c4-4bd9-46e5-891f-958f2f029ac5-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-45b5160a-ee5e-415b-9aff-34ddc7257b59-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-4de63a5e-c388-4b7c-8a8a-d39f75dc61e9-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-52897392-1ce3-49b8-b6e4-16726cef4aeb-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-57224405-f58b-4774-8782-b9e7c25f989f-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-8a8dfab3-41f0-419a-bef1-d30e052ca15a-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-9ae4c66a-fe16-4607-a98d-ae6ad338fccb-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-a8be3668-cc74-4849-a47d-1fc7646acedc-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-da88af72-f830-4638-984b-becff6aa4587-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-e1d5ac50-30a4-46a7-a736-feb6d779b206-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f15e3882-3a44-41e4-963b-5f602891de01-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-f2b2d6cb-1513-43b5-a5f9-3c62e202eef1-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00187-fc39bc8a-c1f9-4998-95af-290f0f7a57d6-c000.json.gz deleted file mode 100644 index 6cfe02812d5b9e72b8204dbc970edbd359d679c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 348 zcmV-i0i*sOiwFP!000000CiG5Z^AGT-TNyd&!{qikl+nm}pu%sZ3;$nM9`2LX3t;EMOdL5ybRPuQ&O-oT!x`bU034{?+A+ udfBwHtyEnt<+_@qM!Bvz$9V(QN)h=0*3i!xZ=Fwje)s`+sm?P>0ssJmIi^(r diff --git a/dhp-workflows/dhp-enrichment/null/newRelation/part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz b/dhp-workflows/dhp-enrichment/null/newRelation/part-00192-044d64fe-f14a-4909-8047-ea2fb018bd87-c000.json.gz deleted file mode 100644 index 9e0f1691ec6b977af3514ba3758ef6fa23058a6b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 324 zcmV-K0lWSmiwFP!000000G(1hPs1<}-uV@g=L)5yDXq7PsS^mXA%rgWIkDu}l^>N# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxyN# z>3`?!R!tdL@OHlMarfCrCLzMsD#!SdNi-Nyb_Z=0db9!=S6~9p*!5=T-5xEJ9;0Fh zJw)v+yI?#@#-mS#v)4g)W>T5RAPR|0g@F(>k(kGjOyth@VD(o~JhQtfZ!kBGe zCfmxX3Y5}DpUdc~vrH8S?@~Whq(Ch}*QP)Mi3Ks>h{aS|jC`7FBYtBpj(?y|Ge)b1 zNQnA=)#f@!f<9^tjK6=(XeslQM<>VtVUyzE{MLr3W4gpfh_}NIuK_&Y?4Rdw$(OL( z`kSOJXpf(CI7hK;8(h_k_40AvG~Ig9ER~c^EgPlUxy zurM&b;nT18Fekm6U3W zus2w}=5_9Gr3bLpduAnPJ7DM7b{-K+)4#F@DK;fNZcxG_o{Q4S<2%#i)q(VheDM*NNcX|ch z{^@<-s&8QMxK4Mym0{NN7PGeCNfWvpq9Ll;QRpIsLcMz8sYqHgc)+EPfrJ5dc673PA+r0hzdHU{tq0(29u>b%7J`q7k diff --git a/dhp-workflows/dhp-enrichment/null/resOrg/part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/resOrg/part-00096-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz deleted file mode 100644 index 7ee2b282a26fcda1d29218e128be371dd41765e5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 184 zcmV;p07w5HiwFP!000000Hw}34ni>uMbVyRq^5A3A1BT>C=o)5V@nJYUyMS)-hqOS z3d&a;y?b*Ty}IHe+`IPExV+dBz?XuVS!419oDzG=h-h6~WNEM2-PE1y_M|gCx-xh8 zgMXV{UDy3_C0jj|5}1%sl>=GmL6OWr37S@ts_@M~NJDPYVoz}pj$TcL6M*_$fp|8zcuby8TUty_0RRB=jZ(A# diff --git a/dhp-workflows/dhp-enrichment/null/resOrg/part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/resOrg/part-00117-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz deleted file mode 100644 index 0389977c081f89118b6305eefb5a039bdd116635..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 126 zcmb2|=3sz;xu>o94jJ$;T$r0&kyc=rHz%f9u(Tk@ok>g4(b1`OcQJ$6y?s2R@a`gHR!{`DDFk;2v+i3Xdv dCV8Jq7I8aU_-e&Mz8Utr_4ge7WwQ}zEdbLjG)Djc diff --git a/dhp-workflows/dhp-enrichment/null/resOrg/part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz b/dhp-workflows/dhp-enrichment/null/resOrg/part-00140-af835ec5-a763-44f1-ad60-c02307951b8e-c000.json.gz deleted file mode 100644 index 57f1fb89852e4d92e02b9ed0fef7e3e68e292b12..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 164 zcmV;V09*ebiwFP!000000G-WC3c@fHh2go&xMK%5N%P!>LlH^t1FFSJt%6whZb4Vz zng8c|!C9VAz~jPWJ=JOMX^>kfhN@sow)sHBIg;gOG~~ph5n~5ic+}x8JDH*E!QwJr z<+6TE$91Jtuhfz>>!oqfNNi0O)M_L!lUaNFhxv1AKgrh-3f@V=E{ABGYq@hlnH{oI S^@ic_-h2RjRboHo0002)#82M< From a1cafaf2e36cc8c5de2354e723739b85aa1c95bd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 16 Nov 2021 15:16:28 +0100 Subject: [PATCH 018/176] added mvn site for dnet-hadoop project --- dhp-build/dhp-code-style/pom.xml | 5 +++++ dhp-build/pom.xml | 11 +++++++++++ dhp-build/src/site/site.xml | 22 ++++++++++++++++++++++ dhp-common/pom.xml | 7 +++++++ dhp-workflows/pom.xml | 7 +++++++ dhp-workflows/src/site/site.xml | 25 +++++++++++++++++++++++++ pom.xml | 5 +++++ src/site/site.xml | 21 +++++++++++++++++++++ 8 files changed, 103 insertions(+) create mode 100644 dhp-build/src/site/site.xml create mode 100644 dhp-workflows/src/site/site.xml create mode 100644 src/site/site.xml diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 77aa2aedb..7a6a32e0e 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -22,6 +22,10 @@ dnet45-releases https://maven.d4science.org/nexus/content/repositories/dnet45-releases + + DHPSite + file://${dhp.site.stage.path}/site/dhp-build/dhp-code-style + @@ -43,6 +47,7 @@ UTF-8 + /tmp/dhp-site \ No newline at end of file diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 12b999b9c..fed689a06 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -10,6 +10,9 @@ pom This module is a container for the build tools used in dnet-hadoop + + true + dhp-code-style @@ -17,4 +20,12 @@ dhp-build-properties-maven-plugin + + + + DHPSite + file://${dhp.site.stage.path}/site/dhp-build + + + diff --git a/dhp-build/src/site/site.xml b/dhp-build/src/site/site.xml new file mode 100644 index 000000000..2d9d769a2 --- /dev/null +++ b/dhp-build/src/site/site.xml @@ -0,0 +1,22 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c057123b1..686b89f6b 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -13,6 +13,13 @@ dhp-common jar + + + DHPSite + file://${dhp.site.stage.path}/site/dhp-common + + + This module contains common utilities meant to be used across the dnet-hadoop submodules diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 22ee77619..89ba2bf70 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -15,6 +15,13 @@ This module is the container for the oozie workflow definitions in dnet-hadoop project + + + DHPSite + file://${dhp.site.stage.path}/site/dhp-workflows + + + dhp-workflow-profiles dhp-aggregation diff --git a/dhp-workflows/src/site/site.xml b/dhp-workflows/src/site/site.xml new file mode 100644 index 000000000..6b742db6a --- /dev/null +++ b/dhp-workflows/src/site/site.xml @@ -0,0 +1,25 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 71c55d1f0..f7e3c6226 100644 --- a/pom.xml +++ b/pom.xml @@ -719,6 +719,10 @@ dnet45-releases https://maven.d4science.org/nexus/content/repositories/dnet45-releases + + DHPSite + file://${dhp.site.stage.path}/site/ + @@ -734,6 +738,7 @@ + /tmp/dhp-site UTF-8 UTF-8 3.6.0 diff --git a/src/site/site.xml b/src/site/site.xml new file mode 100644 index 000000000..634a2c154 --- /dev/null +++ b/src/site/site.xml @@ -0,0 +1,21 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + \ No newline at end of file From 28ea532ecefeac266b425ce770958b0fd195a75c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 16 Nov 2021 15:24:19 +0100 Subject: [PATCH 019/176] [Affilaition Propagation] moved the selection of graph relation as a preparation step --- .../PrepareInfo.java | 17 +++++++- .../SparkResultToOrganizationFromSemRel.java | 6 +-- .../StepActions.java | 13 ++---- .../input_preparation_parameter.json | 6 +++ .../input_propagation_parameter.json | 4 +- .../oozie_app/workflow.xml | 3 +- .../PrepareInfoJobTest.java | 42 +++++++++++++++---- .../SparkJobTest.java | 8 ++-- 8 files changed, 71 insertions(+), 28 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java index d853f3858..707462f24 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -73,6 +73,9 @@ public class PrepareInfo implements Serializable { final String resultOrganizationPath = parser.get("resultOrgPath"); log.info("resultOrganizationPath: {}", resultOrganizationPath); + final String relationPath = parser.get("relationPath"); + log.info("relationPath: {}", relationPath); + SparkConf conf = new SparkConf(); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); @@ -84,11 +87,12 @@ public class PrepareInfo implements Serializable { graphPath, childParentPath, leavesPath, - resultOrganizationPath)); + resultOrganizationPath, + relationPath)); } private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, - String currentIterationPath, String resultOrganizationPath) { + String currentIterationPath, String resultOrganizationPath, String relationPath) { Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); relation.createOrReplaceTempView("relation"); @@ -108,6 +112,15 @@ public class PrepareInfo implements Serializable { .option("compression", "gzip") .json(resultOrganizationPath); + relation + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(relationPath); + Dataset children = spark .sql( "Select distinct target as child from relation where " + diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index f5d8361d7..e26276f53 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -49,8 +49,8 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { Boolean isSparkSessionManaged = isSparkSessionManaged(parser); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String graphPath = parser.get("graphPath"); - log.info("graphPath: {}", graphPath); + String relationPath = parser.get("relationPath"); + log.info("relationPath: {}", relationPath); final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); @@ -78,7 +78,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { leavesPath, childParentPath, resultOrganizationPath, - graphPath, + relationPath, workingPath, outputPath)); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java index d190e5342..02444cb15 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -26,16 +26,11 @@ import scala.Tuple2; public class StepActions implements Serializable { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void execStep(SparkSession spark, String graphPath, String newRelationPath, String leavesPath, String chldParentOrgPath, String resultOrgPath) { - Dataset relationGraph = readPath(spark, graphPath + "/relation", Relation.class) - .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)); + Dataset relationGraph = readPath(spark, graphPath, Relation.class); // select only the relation source target among those proposed by propagation that are not already existent getNewRels( newRelationPath, relationGraph, @@ -80,8 +75,8 @@ public class StepActions implements Serializable { ret.setValueSet(orgs); return ret; }, Encoders.bean(KeyValueSet.class)) - .write() - .mode(SaveMode.Overwrite) + .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); } @@ -116,7 +111,7 @@ public class StepActions implements Serializable { // union of new propagation relations to the relation set // grouping from sourcetarget (we are sure the only relations are those from result to organization by // construction of the set) - // if at least one relation in the set was harvested no new relation will be returned + // if at least one relation in the set was not produced by propagation no new relation will be returned relationDataset .union(newRels) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json index baa8ba333..c79bfe05d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json @@ -34,5 +34,11 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "the path where prepared info have been stored", "paramRequired": false + }, + { + "paramName": "rep", + "paramLongName": "relationPath", + "paramDescription": "the path where to store the selected subset of relations", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json index bd7bb50f9..f73cc221e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json @@ -1,7 +1,7 @@ [ { - "paramName":"gp", - "paramLongName":"graphPath", + "paramName":"rep", + "paramLongName":"relationPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml index e62ce0f5a..17502abea 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -150,6 +150,7 @@ --leavesPath${workingDir}/preparedInfo/leavesPath --childParentPath${workingDir}/preparedInfo/childParentPath --resultOrgPath${workingDir}/preparedInfo/resultOrgPath + --relationPath${workingDir}/preparedInfo/relation @@ -173,7 +174,7 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --graphPath${sourcePath} + --relationPath${workingDir}/preparedInfo/relation --outputPath${outputPath}/relation --leavesPath${workingDir}/preparedInfo/leavesPath --childParentPath${workingDir}/preparedInfo/childParentPath diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java index 973c663b9..21d99321b 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java @@ -84,6 +84,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" }); @@ -228,6 +229,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -332,6 +334,35 @@ public class PrepareInfoJobTest { } + @Test + public void relationTest()throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(7, verificationDs.count()); + + } @Test public void resultOrganizationTest1() throws Exception { @@ -347,6 +378,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -467,10 +499,6 @@ public class PrepareInfoJobTest { @Test public void foundLeavesTest1() throws Exception { -// PrepareInfo.prepareInfo(spark, getClass() -// .getResource( -// "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") -// .getPath(), workingDir.toString() + "/childParentOrg/", workingDir.toString() + "/currentIteration/",workingDir.toString() + "/resultOrganization/"); PrepareInfo .main( @@ -484,6 +512,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -510,12 +539,9 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" }); -// PrepareInfo.prepareInfo(spark, getClass() -// .getResource( -// "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") -// .getPath(), workingDir.toString() + "/childParentOrg/", workingDir.toString() + "/currentIteration/",workingDir.toString() + "/resultOrganization/"); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java index 5a4b19a24..7dd575b66 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java @@ -101,9 +101,9 @@ public class SparkJobTest { .main( new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-graphPath", graphPath, + "-relationPath", graphPath, "-hive_metastore_uris", "", - "-outputPath", workingDir.toString() + "/relation", + "-outputPath", workingDir.toString() + "/finalrelation", "-leavesPath", workingDir.toString() + "/leavesInput", "-resultOrgPath", workingDir.toString() + "/orgsInput", "-childParentPath", childParentPath, @@ -113,9 +113,11 @@ public class SparkJobTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") + .textFile(workingDir.toString() + "/finalrelation") .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + Assertions.assertEquals(18, tmp.count()); tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); From 2d67020c590cd4d779e978ca8688c632c0019a57 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 16 Nov 2021 16:01:08 +0100 Subject: [PATCH 020/176] added dhp-enrichment maven site template --- .../src/site/markdown/pubmed.md | 6 ++++- .../dhp-aggregation/src/site/site.xml | 1 - .../dhp-enrichment/src/site/markdown/about.md | 1 + .../dhp-enrichment/src/site/site.xml | 26 +++++++++++++++++++ 4 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/site/markdown/about.md create mode 100644 dhp-workflows/dhp-enrichment/src/site/site.xml diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md index f6327a51b..00e3ed877 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md @@ -4,7 +4,7 @@ This section describes the mapping implemented for [MEDLINE/PubMed](https://pubm Collection --------- The native data is collected from [ftp baseline](https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/) containing XML with -the following [shcema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html) +the following [schema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html) Parsing @@ -50,6 +50,10 @@ The table below describes the mapping from the XML Native to the OAF mapping |//Author/FullName| author.Forename| Concatenation of forname + lastName if exist | |FOR ALL AUTHOR | author.rank| sequential number starting from 1| +#TODO + +Missing item mapped + diff --git a/dhp-workflows/dhp-aggregation/src/site/site.xml b/dhp-workflows/dhp-aggregation/src/site/site.xml index da5da0f1e..c0a70264d 100644 --- a/dhp-workflows/dhp-aggregation/src/site/site.xml +++ b/dhp-workflows/dhp-aggregation/src/site/site.xml @@ -20,7 +20,6 @@ - diff --git a/dhp-workflows/dhp-enrichment/src/site/markdown/about.md b/dhp-workflows/dhp-enrichment/src/site/markdown/about.md new file mode 100644 index 000000000..c220c63b2 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/site/markdown/about.md @@ -0,0 +1 @@ +#DHP Enrichment \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/site/site.xml b/dhp-workflows/dhp-enrichment/src/site/site.xml new file mode 100644 index 000000000..dad0cd996 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/site/site.xml @@ -0,0 +1,26 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + + + + + \ No newline at end of file From ec8b0219ff034ddeb3d8c878ecf40ec03337e658 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 16 Nov 2021 17:41:34 +0100 Subject: [PATCH 021/176] [Documentation] Added first page for Integration via unresolved entities generation --- .../src/site/markdown/index.md | 17 +++++++-- .../src/site/markdown/integration.md | 36 +++++++++++++++++++ .../dhp-aggregation/src/site/site.xml | 3 ++ 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/site/markdown/integration.md diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md index c0c756082..240617f91 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md @@ -1,9 +1,20 @@ ##DHP-Aggregation -This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records. +This module defines a set of oozie workflows for -Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure +1. the **collection** and **transformation** of metadata records. +2. the **integration** of new external information in the result + + +### Collection and Transformation + +The workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping of each MDStore. -It defines [mappings](mappings.md) for transformation of different datasource (See mapping section). \ No newline at end of file +It defines [mappings](mappings.md) for transformation of different datasource (See mapping section). + +### Integration of external information in the result + +The workflows create new entity in the OpenAIRE format (OAF) which aim is to enrich the result already contained in the graph. +See integration section for more insight diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md new file mode 100644 index 000000000..ef19cfd24 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md @@ -0,0 +1,36 @@ +DHP Aggregation - Integration method +===================================== + +The integration method can be applied every time new information, which is not aggregated from the repositories +nor computed directly by OpenAIRE, should be added to the results of the graph. + +The information integrated so far is: + +1. Article impact measures + 1. [Bip!Finder](https://dl.acm.org/doi/10.1145/3357384.3357850) scores +2. Result Subjects + 1. Integration of Fields od Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in + results subjects. + + +The method always consists in the creation of a new entity in the OpenAIRE format (OAF entity) containing only the id +and the element in the OAF model that should be used to map the information we want to integrate. + +The id is set by using a particular encoding of the given PID + +*unresolved:[pid]:[pidtype]* + +where + +1 *unresolved* is a constant value +2 *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307 +3 *pidtype* is the persistent id type, e.g. doi + +Such entities are matched against those available in the graph using the result.instance.pid values. + +This mechanism can be used to integrate enrichments produced as associated by a given PID. +If a match will be found with one of the results already in the graph that said result will be enriched with the information +present in the new OAF. +All the objects for which a match is not found are discarded. + + diff --git a/dhp-workflows/dhp-aggregation/src/site/site.xml b/dhp-workflows/dhp-aggregation/src/site/site.xml index c0a70264d..75fc5032e 100644 --- a/dhp-workflows/dhp-aggregation/src/site/site.xml +++ b/dhp-workflows/dhp-aggregation/src/site/site.xml @@ -19,6 +19,9 @@ + + + From 4094f2bb9ab5ad87a31b6691b8b853542d6b7685 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 17 Nov 2021 10:04:52 +0100 Subject: [PATCH 022/176] added integration md file --- .../dhp-aggregation/src/site/markdown/integration.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md index ef19cfd24..baf232e40 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md @@ -22,9 +22,9 @@ The id is set by using a particular encoding of the given PID where -1 *unresolved* is a constant value -2 *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307 -3 *pidtype* is the persistent id type, e.g. doi +1. *unresolved* is a constant value +2. *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307 +3. *pidtype* is the persistent id type, e.g. doi Such entities are matched against those available in the graph using the result.instance.pid values. From cded363b5595ae9dd3a9cf5cdbe300a86916b0a0 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 11:06:35 +0100 Subject: [PATCH 023/176] code refactor, created and moved scala code on the correct maven folder under src/main/scala and src/test/scala --- .../scholix/SparkCreateActionset.scala | 0 .../scholix/SparkSaveActionSet.scala | 2 +- .../dhp/collection/CollectionUtils.scala | 0 .../dhp/datacite/AbstractRestClient.scala | 0 .../dhp/datacite/DataciteAPIImporter.scala | 0 .../DataciteToOAFTransformation.scala | 0 .../GenerateDataciteDatasetSpark.scala | 3 +-- .../dnetlib/dhp/datacite/ImportDatacite.scala | 0 .../SparkDownloadUpdateDatacite.scala | 0 .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 1 + .../bio/SparkTransformBioDatabaseToOAF.scala | 12 +++++------ .../ebi/SparkCreateBaselineDataFrame.scala | 2 +- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 3 +-- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 5 ++--- .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 0 .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 20 ++++++++----------- .../{actionmanager => }/datacite/record.json | 0 .../dhp/datacite/DataciteToOAFTest.scala | 0 .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 0 19 files changed, 21 insertions(+), 27 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala (96%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/collection/CollectionUtils.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/AbstractRestClient.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala (95%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/ImportDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala (73%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala (93%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala (96%) rename dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/record.json (100%) rename dhp-workflows/dhp-aggregation/src/test/{java => scala}/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala (100%) rename dhp-workflows/dhp-aggregation/src/test/{java => scala}/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala (100%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala index 1df7ea3fb..62d219b57 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.actionmanager.scholix import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, Oaf, Publication, Software, OtherResearchProduct, Relation} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index a63627d1c..3c8caa485 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -3,8 +3,7 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations -import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH -import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 70dcc0184..853b24862 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -7,6 +7,7 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.{compact, parse, render} import collection.JavaConverters._ + object BioDBToOAF { case class EBILinkItem(id: Long, links: String) {} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 73% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 8ae8285e3..fcceacd44 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,9 +1,9 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Oaf -import BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -36,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 17d21f19c..660a26a6c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} +import eu.dnetlib.dhp.sx.bio.pubmed._ import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index eab6b1dc6..18e39387f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,9 +1,8 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 8da617ca0..12af4824b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -38,7 +37,7 @@ object SparkEBILinksToOaf { ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) + .flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index ecef32202..d5d40ecfe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -4,10 +4,9 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ -import scala.collection.JavaConverters._ import java.util.regex.Pattern - +import collection.JavaConverters._ /** * */ @@ -22,10 +21,10 @@ object PubMedToOaf { val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") - /** * Cleaning the DOI Applying regex in order to * remove doi starting with URL + * * @param doi input DOI * @return cleaned DOI */ @@ -49,7 +48,7 @@ object PubMedToOaf { * starting from OAF instanceType value * * @param cobjQualifier OAF instance type - * @param vocabularies All dnet vocabularies + * @param vocabularies All dnet vocabularies * @return the correct instance */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { @@ -65,7 +64,7 @@ object PubMedToOaf { } /** - * Mapping the Pubmedjournal info into the OAF Journale + * Mapping the Pubmedjournal info into the OAF Journale * * @param j the pubmedJournal * @return the OAF Journal @@ -91,9 +90,8 @@ object PubMedToOaf { * Find vocabulary term into synonyms and term in the vocabulary * * @param vocabularyName the input vocabulary name - * @param vocabularies all the vocabularies - * @param term the term to find - * + * @param vocabularies all the vocabularies + * @param term the term to find * @return the cleaned term value */ def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { @@ -104,10 +102,9 @@ object PubMedToOaf { /** - * Map the Pubmed Article into the OAF instance + * Map the Pubmed Article into the OAF instance * - * - * @param article the pubmed articles + * @param article the pubmed articles * @param vocabularies the vocabularies * @return The OAF instance if the mapping did not fail */ @@ -185,7 +182,6 @@ object PubMedToOaf { //-------------------------------------------------------------------------------------- - // RESULT MAPPING //-------------------------------------------------------------------------------------- result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala rename to dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala rename to dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala From 2fd9ceac13d38ee4c2df06ab834de61ff8309972 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 11:35:22 +0100 Subject: [PATCH 024/176] code refactor, created and moved scala code on the correct maven folder under src/main/scala and src/test/scala --- .../doiboost/DoiBoostMappingUtil.scala | 0 .../SparkGenerateDOIBoostActionSet.scala | 0 .../doiboost/SparkGenerateDoiBoost.scala | 0 .../doiboost/crossref/Crossref2Oaf.scala | 9 +++--- .../doiboost/crossref/CrossrefDataset.scala | 25 ++++++++-------- .../doiboost/crossref/CrossrefImporter.java | 0 .../dnetlib/doiboost/crossref/ESClient.java | 0 .../crossref/ExtractCrossrefRecords.java | 0 .../crossref/GenerateCrossrefDataset.scala | 20 +++++-------- .../crossref/SparkMapDumpIntoOAF.scala | 4 +-- .../crossref/UnpackCrtossrefEntries.scala | 9 ++---- .../dnetlib/doiboost/mag/MagDataModel.scala | 2 +- .../mag/SparkImportMagIntoDataset.scala | 7 ++--- .../doiboost/mag/SparkProcessMAG.scala | 20 +++++-------- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 7 ++--- .../orcid/SparkConvertORCIDToOAF.scala | 8 ++--- .../doiboost/orcid/SparkPreprocessORCID.scala | 29 +++++++++---------- .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 8 ++--- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 3 +- .../doiboost/DoiBoostHostedByMapTest.scala | 2 +- .../dnetlib}/doiboost/NormalizeDoiTest.scala | 2 +- .../crossref/CrossrefMappingTest.scala | 0 .../dnetlib/doiboost/mag/MAGMappingTest.scala | 2 +- .../orcid/MappingORCIDToOAFTest.scala | 3 +- .../doiboost/uw/UnpayWallMappingTest.scala | 4 +-- 25 files changed, 69 insertions(+), 95 deletions(-) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala (99%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala (77%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/CrossrefImporter.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/ESClient.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala (73%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala (96%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala (88%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/MagDataModel.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala (91%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala (84%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala (67%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala (80%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib/dhp => scala/eu/dnetlib}/doiboost/DoiBoostHostedByMapTest.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib/dhp => scala/eu/dnetlib}/doiboost/NormalizeDoiTest.scala (96%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/mag/MAGMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala (99%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala (100%) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 1b1c850ba..edca4a180 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,20 +4,19 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JValue, _} +import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ import org.slf4j.{Logger, LoggerFactory} +import java.util import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex -import java.util - -import eu.dnetlib.doiboost.DoiBoostMappingUtil case class CrossrefDT(doi: String, json:String, timestamp: Long) {} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala similarity index 77% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 159b817c7..6a1c701af 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -6,7 +6,7 @@ import org.apache.commons.io.IOUtils import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -17,12 +17,12 @@ object CrossrefDataset { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - def to_item(input:String):CrossrefDT = { + def to_item(input: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val ts: Long = (json \ "indexed" \ "timestamp").extract[Long] + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) CrossrefDT(doi, input, ts) } @@ -30,7 +30,6 @@ object CrossrefDataset { def main(args: Array[String]): Unit = { - val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) parser.parseArgument(args) @@ -54,7 +53,7 @@ object CrossrefDataset { return b - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -66,7 +65,7 @@ object CrossrefDataset { if (a == null) return b - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -79,20 +78,20 @@ object CrossrefDataset { override def finish(reduction: CrossrefDT): CrossrefDT = reduction } - val workingPath:String = parser.get("workingPath") + val workingPath: String = parser.get("workingPath") - val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] + val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] val update = - spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) - .map(i =>CrossrefImporter.decompressBlob(i._2.toString)) - .map(i =>to_item(i))) + spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i => CrossrefImporter.decompressBlob(i._2.toString)) + .map(i => to_item(i))) main_ds.union(update).groupByKey(_.doi) .agg(crossrefAggregator.toColumn) - .map(s=>s._2) + .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala similarity index 73% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index 526ff7b3a..6d03abc25 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -2,17 +2,12 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item -import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass -import org.apache.hadoop.io.{IntWritable, Text} -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.JArray -import org.json4s.jackson.JsonMethods.{compact, parse, render} +import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -24,11 +19,10 @@ object GenerateCrossrefDataset { implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT] - def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long] CrossrefDT(doi, meta, timestamp) @@ -51,14 +45,14 @@ object GenerateCrossrefDataset { import spark.implicits._ - val tmp : RDD[String] = sc.textFile(sourcePath,6000) + val tmp: RDD[String] = sc.textFile(sourcePath, 6000) spark.createDataset(tmp) .map(entry => crossrefElement(entry)) .write.mode(SaveMode.Overwrite).save(targetPath) -// .map(meta => crossrefElement(meta)) -// .toDS.as[CrossrefDT] -// .write.mode(SaveMode.Overwrite).save(targetPath) + // .map(meta => crossrefElement(meta)) + // .toDS.as[CrossrefDT] + // .write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index c65916610..fa55b9fb9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -4,10 +4,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset} import org.apache.commons.io.IOUtils - import org.apache.spark.SparkConf - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala similarity index 88% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala index 95ecb568b..191c4587e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala @@ -2,8 +2,8 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.hadoop.io.compress.GzipCodec +import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST.JArray @@ -17,9 +17,7 @@ object UnpackCrtossrefEntries { val log: Logger = LoggerFactory.getLogger(UnpackCrtossrefEntries.getClass) - - - def extractDump(input:String):List[String] = { + def extractDump(input: String): List[String] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -30,7 +28,6 @@ object UnpackCrtossrefEntries { } - def main(args: Array[String]): Unit = { val conf = new SparkConf val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) @@ -45,7 +42,7 @@ object UnpackCrtossrefEntries { .getOrCreate() val sc: SparkContext = spark.sparkContext - sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2)) + sc.wholeTextFiles(sourcePath, 6000).flatMap(d => extractDump(d._2)) .saveAsTextFile(targetPath, classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala index fd9629024..0a6fa00f0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import scala.collection.JavaConverters._ import scala.collection.mutable diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index a68d0bb2d..d25a4893f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -3,8 +3,8 @@ package eu.dnetlib.doiboost.mag import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{SaveMode, SparkSession} import org.apache.spark.sql.types._ +import org.apache.spark.sql.{SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkImportMagIntoDataset { @@ -24,13 +24,13 @@ object SparkImportMagIntoDataset { "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), + "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), + "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), @@ -75,7 +75,6 @@ object SparkImportMagIntoDataset { .master(parser.get("master")).getOrCreate() - stream.foreach { case (k, v) => val s: StructType = getSchema(k) val df = spark.read diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala similarity index 91% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 016279787..932725446 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -5,19 +5,16 @@ import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.{col, collect_list, struct} import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ - object SparkProcessMAG { - def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={ + def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { d.where(col("Doi").isNotNull) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) + .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) .map(_._2)(Encoders.product[MagPapers]) .map(mp => { new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), @@ -98,13 +95,13 @@ object SparkProcessMAG { var magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val conference = spark.read.load(s"$sourcePath/ConferenceInstances") - .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" ) + .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate", $"EndDate") val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) - .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] + .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate", $"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") @@ -122,7 +119,7 @@ object SparkProcessMAG { magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") + ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") @@ -148,11 +145,10 @@ object SparkProcessMAG { spark.read.load(s"$workingPath/mag_publication").as[Publication] .filter(p => p.getId == null) .groupByKey(p => p.getId) - .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) + .reduceGroups((a: Publication, b: Publication) => ConversionUtil.mergePublication(a, b)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 1cd3f7028..11031f9ca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -4,17 +4,16 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} -import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI} import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo} import org.apache.commons.lang.StringUtils -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala similarity index 84% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index fa4a93e00..1b189e296 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -11,10 +11,10 @@ object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { + def run(spark: SparkSession, workingPath: String, targetPath: String): Unit = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + val dataset: Dataset[ORCIDItem] = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) @@ -35,8 +35,8 @@ object SparkConvertORCIDToOAF { val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark,workingPath, targetPath) + run(spark, workingPath, targetPath) } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala similarity index 67% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index 31f331912..153be5dd1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -1,48 +1,45 @@ package eu.dnetlib.doiboost.orcid -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.dhp.schema.orcid.OrcidDOI import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.functions.{col, collect_list} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkPreprocessORCID { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def fixORCIDItem(item :ORCIDItem):ORCIDItem = { - ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + def fixORCIDItem(item: ORCIDItem): ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) } - def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { + def run(spark: SparkSession, sourcePath: String, workingPath: String): Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) + val inputRDD: RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s != null).filter(s => ORCIDToOAF.authorValid(s)) spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s != null) spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + val authors: Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + val works: Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i =>{ + .map(i => { val doi = i._1.doi val author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) + (doi, author) + }).groupBy(col("_1").alias("doi")) .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] .map(s => fixORCIDItem(s)) .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") @@ -67,4 +64,4 @@ object SparkPreprocessORCID { } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala similarity index 80% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 4530926f1..70290018d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -1,16 +1,14 @@ package eu.dnetlib.doiboost.uw import eu.dnetlib.dhp.application.ArgumentApplicationParser - import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - object SparkMapUnpayWallToOAF { def main(args: Array[String]): Unit = { @@ -32,11 +30,11 @@ object SparkMapUnpayWallToOAF { val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath") + val inputRDD: RDD[String] = spark.sparkContext.textFile(s"$sourcePath") logger.info("Converting UnpayWall to OAF") - val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication] + val d: Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)).as[Publication] d.write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index c8324cde1..bf5694965 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -4,14 +4,13 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ -import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala index 4912648be..049ac37f4 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.doiboost +package eu.dnetlib.doiboost import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset} import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala index a9a841ee9..bdf845f19 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.doiboost +package eu.dnetlib.doiboost import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 46d4ec08d..7403e103e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.mag import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, SparkSession} import org.codehaus.jackson.map.ObjectMapper +import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test -import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} import java.sql.Timestamp diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index b484dc087..a5ce6296c 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -10,9 +10,8 @@ import org.junit.jupiter.api.io.TempDir import org.slf4j.{Logger, LoggerFactory} import java.nio.file.Path -import scala.io.Source - import scala.collection.JavaConversions._ +import scala.io.Source class MappingORCIDToOAFTest { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala index fa696fffc..012ed3da0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala @@ -3,11 +3,11 @@ package eu.dnetlib.doiboost.uw import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.OpenAccessRoute +import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test +import org.slf4j.{Logger, LoggerFactory} import scala.io.Source -import org.junit.jupiter.api.Assertions._ -import org.slf4j.{Logger, LoggerFactory} class UnpayWallMappingTest { From 1f5ee116ed7d897831fcb74818f6c5a5cc9ee76b Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 12:23:52 +0100 Subject: [PATCH 025/176] code refactor, created and moved scala code on the correct maven folder under src/main/scala and src/test/scala fixed test --- .../sx/graphimport/SparkDataciteToOAF.scala | 31 ---- .../oa/graph/hostedbymap/Aggregators.scala | 2 +- .../SparkApplyHostedByMapToDatasource.scala | 13 +- .../SparkApplyHostedByMapToResult.scala | 18 +-- .../SparkPrepareHostedByInfoToApply.scala | 35 ++--- .../hostedbymap/SparkProduceHostedByMap.scala | 109 +++++++------ .../raw/CopyHdfsOafSparkApplication.scala | 10 +- .../resolution/SparkResolveEntities.scala | 25 ++- .../resolution/SparkResolveRelation.scala | 2 +- .../graph/SparkConvertDatasetToJsonRDD.scala | 10 +- .../sx/graph/SparkConvertObjectToJson.scala | 10 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 27 ++-- .../dhp/sx/graph/SparkCreateInputGraph.scala | 27 ++-- .../dhp/sx/graph/SparkCreateScholix.scala | 28 ++-- .../sx/graph/SparkCreateSummaryObject.scala | 12 +- .../dhp/sx/graph/pangaea/PangaeaUtils.scala | 1 + .../SparkGeneratePanagaeaDataset.scala | 18 +-- .../dhp/sx/graph/scholix/ScholixUtils.scala | 143 +++++++++--------- .../dnetlib/dhp/oa/graph/resolution/dataset | 6 +- .../oa/graph/hostedbymap/DownloadCsvTest.java | 0 .../dhp/oa/graph/hostedbymap/TestApply.scala | 0 .../oa/graph/hostedbymap/TestPrepare.scala | 4 - .../oa/graph/hostedbymap/TestPreprocess.scala | 5 +- .../resolution/ResolveEntitiesTest.scala | 1 + .../sx/graph/scholix/ScholixGraphTest.scala | 0 .../dhp/sx/pangaea/PangaeaTransformTest.scala | 1 - 26 files changed, 235 insertions(+), 303 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala (81%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala (85%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala (74%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala (88%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala (79%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala (69%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala (62%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala (68%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala (96%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala (98%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala (95%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala deleted file mode 100644 index 9e905d806..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala +++ /dev/null @@ -1,31 +0,0 @@ -package eu.dnetlib.dhp.oa.sx.graphimport - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.SparkSession - -object SparkDataciteToOAF { - - - def main(args: Array[String]): Unit = { - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - import spark.implicits._ - - - val sc = spark.sparkContext - - val inputPath = parser.get("inputPath") - - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index ce383292c..ad4e1c96e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala similarity index 81% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 1b18ba3ae..38af3eee4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -2,13 +2,12 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} @@ -52,18 +51,18 @@ object SparkApplyHostedByMapToDatasource { val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + val dats: Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala similarity index 85% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 0e047d016..d360da2e9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -5,16 +5,13 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import eu.dnetlib.dhp.schema.oaf.{Instance, OpenAccessRoute, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ - - object SparkApplyHostedByMapToResult { def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { @@ -39,6 +36,7 @@ object SparkApplyHostedByMapToResult { p })(Encoders.bean(classOf[Publication])) } + def main(args: Array[String]): Unit = { @@ -67,18 +65,18 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + val pubs: Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + val pinfo: Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala similarity index 74% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index b7a7d352f..87e203e4b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,61 +3,58 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo - import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} - - object SparkPrepareHostedByInfoToApply { implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { - var lst:List[EntityInfo] = List() + def getList(id: String, j: Journal, name: String): List[EntityInfo] = { + var lst: List[EntityInfo] = List() - if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst } - if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst } - if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst } lst } - def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + def prepareResultInfo(spark: SparkSession, publicationPath: String): Dataset[EntityInfo] = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) val mapper = new ObjectMapper() - val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + val dd: Dataset[Publication] = spark.read.textFile(publicationPath) .map(r => mapper.readValue(r, classOf[Publication])) - dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + dd.filter(p => p.getJournal != null).flatMap(p => getList(p.getId, p.getJournal, "")) } - def toEntityInfo(input:String): EntityInfo = { + def toEntityInfo(input: String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] toEntityItem(c.keys.head, c.values.head) } - def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + def toEntityItem(journal_id: String, hbi: HostedByItemType): EntityInfo = { EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) @@ -67,7 +64,7 @@ object SparkPrepareHostedByInfoToApply { Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 - if(t2._2 != null ){ + if (t2._2 != null) { val ds = t2._2 res.setHostedById(ds.getId) res.setOpenAccess(ds.getOpenAccess) @@ -107,10 +104,10 @@ object SparkPrepareHostedByInfoToApply { //STEP1: read the hostedbymap and transform it in EntityInfo - val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + val hostedByInfo: Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) - val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) + val resultInfoDataset: Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 1ee1d5d1a..6dfe35623 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,41 +1,39 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import com.fasterxml.jackson.databind.ObjectMapper -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.FileSystem -import org.apache.hadoop.fs.Path + import java.io.PrintWriter -import org.apache.hadoop.io.compress.GzipCodec - - object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)): HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 val gold: HostedByInfo = input._2 val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => null + case whoa => null } } @@ -44,7 +42,7 @@ object SparkProduceHostedByMap { implicit val formats = org.json4s.DefaultFormats - val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + val map: Map[String, HostedByItemType] = Map(input._1 -> input._2) Serialization.write(map) @@ -52,34 +50,33 @@ object SparkProduceHostedByMap { } - - def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { - if(issn != null){ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, issn, eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, eissn, "" , oa) + def getHostedByItemType(id: String, officialname: String, issn: String, eissn: String, issnl: String, oa: Boolean): HostedByItemType = { + if (issn != null) { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, issn, eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, issn, eissn, "", oa) } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, issn, "", issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, "", "" , oa) + } else { + if (issnl != null) { + HostedByItemType(id, officialname, issn, "", issnl, oa) + } else { + HostedByItemType(id, officialname, issn, "", "", oa) } } - }else{ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, "", eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, "", eissn, "" , oa) + } else { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, "", eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, "", eissn, "", oa) } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, "", "", issnl , oa) - }else{ - HostedByItemType("", "", "", "", "" , oa) + } else { + if (issnl != null) { + HostedByItemType(id, officialname, "", "", issnl, oa) + } else { + HostedByItemType("", "", "", "", "", oa) } } } @@ -90,10 +87,10 @@ object SparkProduceHostedByMap { return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) } - HostedByItemType("","","","","",false) + HostedByItemType("", "", "", "", "", false) } - def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { + def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ @@ -102,10 +99,10 @@ object SparkProduceHostedByMap { implicit var encoderD = Encoders.kryo[Datasource] - val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + val dd: Dataset[Datasource] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[Datasource])) - dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => oaToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } @@ -115,17 +112,17 @@ object SparkProduceHostedByMap { } - def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + def goldHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] val mapper = new ObjectMapper() - val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) - dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => goldToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } @@ -134,41 +131,40 @@ object SparkProduceHostedByMap { return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) } - def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + def doajHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] val mapper = new ObjectMapper() - val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[DOAJModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[DOAJModel])) - dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => doajToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { - var lst : List[(String, HostedByItemType)] = List() - if(!input.issn.equals("")){ + var lst: List[(String, HostedByItemType)] = List() + if (!input.issn.equals("")) { lst = (input.issn, input) :: lst } - if(!input.eissn.equals("")){ + if (!input.eissn.equals("")) { lst = (input.eissn, input) :: lst } - if(!input.lissn.equals("")){ + if (!input.lissn.equals("")) { lst = (input.lissn, input) :: lst } lst } - - def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode: String): Unit = { val conf = new Configuration() conf.set("fs.defaultFS", hdfsNameNode) - val fs= FileSystem.get(conf) + val fs = FileSystem.get(conf) val output = fs.create(new Path(outputPath)) val writer = new PrintWriter(output) try { @@ -182,7 +178,6 @@ object SparkProduceHostedByMap { } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -213,7 +208,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath, classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala similarity index 88% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index c7ad1890d..0179cc266 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -4,20 +4,14 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.commons.io.IOUtils -import org.apache.commons.lang3.StringUtils -import org.apache.http.client.methods.HttpGet -import org.apache.http.impl.client.HttpClients import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory -import scala.collection.JavaConverters._ import scala.io.Source - +import scala.collection.JavaConverters._ object CopyHdfsOafSparkApplication { def main(args: Array[String]): Unit = { @@ -59,7 +53,7 @@ object CopyHdfsOafSparkApplication { if (validPaths.nonEmpty) { val oaf = spark.read.load(validPaths: _*).as[Oaf] val mapper = new ObjectMapper() - val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + val l = ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList l.foreach( e => oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala similarity index 79% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 316b8afed..6b4a501d6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils @@ -14,7 +13,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkResolveEntities { val mapper = new ObjectMapper() - val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + val entities = List(EntityType.dataset, EntityType.publication, EntityType.software, EntityType.otherresearchproduct) def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) @@ -51,10 +50,10 @@ object SparkResolveEntities { fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) } -} + } -def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { + def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ @@ -71,22 +70,22 @@ def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: St } - def deserializeObject(input:String, entity:EntityType ) :Result = { + def deserializeObject(input: String, entity: EntityType): Result = { - entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software=> mapper.readValue(input, classOf[Software]) - case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) - } + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software => mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) + } } - def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { + def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + val re: Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] entities.foreach { e => diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index cd517dd5e..c7f9b2d0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.oaf.{Relation, Result} +import eu.dnetlib.dhp.schema.oaf.Relation import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala similarity index 69% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 3ee0c7dd6..9d16cf907 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.Result import org.apache.commons.io.IOUtils import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf @@ -29,13 +29,13 @@ object SparkConvertDatasetToJsonRDD { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val resultObject = List("publication","dataset","software", "otherResearchProduct") + val resultObject = List("publication", "dataset", "software", "otherResearchProduct") val mapper = new ObjectMapper() - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - resultObject.foreach{item => - spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + resultObject.foreach { item => + spark.read.load(s"$sourcePath/$item").as[Result].map(r => mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index 846ac37af..cc1b97fd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import org.apache.hadoop.io.compress._ object SparkConvertObjectToJson { @@ -32,8 +32,8 @@ object SparkConvertObjectToJson { log.info(s"objectType -> $objectType") - implicit val scholixEncoder :Encoder[Scholix]= Encoders.kryo[Scholix] - implicit val summaryEncoder :Encoder[ScholixSummary]= Encoders.kryo[ScholixSummary] + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] val mapper = new ObjectMapper @@ -42,11 +42,11 @@ object SparkConvertObjectToJson { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala similarity index 62% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 4b82fe645..2eb5e3a35 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,11 +2,12 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { @@ -31,39 +32,39 @@ object SparkConvertRDDtoDataset { val entityPath = s"$t/entities" val relPath = s"$t/relation" val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) log.info("Converting dataset") - val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + val rddDataset = spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") log.info("Converting publication") - val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + val rddPublication = spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") log.info("Converting software") - val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + val rddSoftware = spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") log.info("Converting otherresearchproduct") - val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + val rddOtherResearchProduct = spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") log.info("Converting Relation") - val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") + val relationSemanticFilter = List("cites", "iscitedby", "merges", "ismergedin") - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") + val rddRelation = spark.sparkContext.textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index 350b00c5e..b6f678967 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -1,14 +1,12 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - - object SparkCreateInputGraph { def main(args: Array[String]): Unit = { @@ -33,7 +31,7 @@ object SparkCreateInputGraph { ) - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) @@ -41,16 +39,13 @@ object SparkCreateInputGraph { implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - - - val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val oafDs:Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] + val oafDs: Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] log.info("Extract Publication") @@ -70,27 +65,27 @@ object SparkCreateInputGraph { resultObject.foreach { r => log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/preprocess/${r._1}",spark, r._2) + makeDatasetUnique(s"$targetPath/extracted/${r._1}", s"$targetPath/preprocess/${r._1}", spark, r._2) } } - def extractEntities[T <: Oaf ](oafDs:Dataset[Oaf], targetPath:String, clazz:Class[T], log:Logger) :Unit = { + def extractEntities[T <: Oaf](oafDs: Dataset[Oaf], targetPath: String, clazz: Class[T], log: Logger): Unit = { - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) log.info(s"Extract ${clazz.getSimpleName}") oafDs.filter(o => o.isInstanceOf[T]).map(p => p.asInstanceOf[T]).write.mode(SaveMode.Overwrite).save(targetPath) } - def makeDatasetUnique[T <: Result ](sourcePath:String, targetPath:String, spark:SparkSession, clazz:Class[T]) :Unit = { + def makeDatasetUnique[T <: Result](sourcePath: String, targetPath: String, spark: SparkSession, clazz: Class[T]): Unit = { import spark.implicits._ - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) - val ds:Dataset[T] = spark.read.load(sourcePath).as[T] + val ds: Dataset[T] = spark.read.load(sourcePath).as[T] - ds.groupByKey(_.getId).reduceGroups{(x,y) => + ds.groupByKey(_.getId).reduceGroups { (x, y) => x.mergeFrom(y) x }.map(_._2).write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index e4fcd2782..9930c57af 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.RelatedEntities import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.count -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkCreateScholix { @@ -42,7 +42,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] - .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] @@ -51,54 +51,54 @@ object SparkCreateScholix { relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Relation), (String, ScholixSummary)) => - if (input._1!= null && input._2!= null) { + if (input._1 != null && input._2 != null) { val rel: Relation = input._1._2 val source: ScholixSummary = input._2._2 (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) } - else null + else null }(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .filter(r => r!= null) + .filter(r => r != null) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) scholixSource.joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Scholix), (String, ScholixSummary)) => - if (input._2== null) { + if (input._2 == null) { null } else { val s: Scholix = input._1._2 val target: ScholixSummary = input._2._2 ScholixUtils.generateCompleteScholix(s, target) } - }.filter(s => s!= null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") + }.filter(s => s != null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") val scholix_o_v: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] scholix_o_v.flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))).as[Scholix] - .map(s=> (s.getIdentifier,s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) .groupByKey(_._1) .agg(ScholixUtils.scholixAggregator.toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix") - val scholix_final:Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] + val scholix_final: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] - val stats:Dataset[(String,String,Long)]= scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String,String,Long)] + val stats: Dataset[(String, String, Long)] = scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String, String, Long)] stats - .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 )) + .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0)) .groupByKey(_.id) - .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset+b.relatedDataset, a.relatedPublication+b.relatedPublication)) + .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset + b.relatedDataset, a.relatedPublication + b.relatedPublication)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/related_entities") - val relatedEntitiesDS:Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication>0 || r.relatedDataset > 0) + val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) - relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map{i => + relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map { i => val re = i._1 val sum = i._2._2 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala similarity index 68% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index 0970375f5..4274cae5a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkCreateSummaryObject { @@ -28,15 +28,15 @@ object SparkCreateSummaryObject { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) + val ds: Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s != null).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala index 193512474..c70397d04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -5,6 +5,7 @@ import org.apache.spark.sql.{Encoder, Encoders} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse + import java.util.regex.Pattern import scala.language.postfixOps import scala.xml.{Elem, Node, XML} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index 79c75d6df..f1a4553ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -2,13 +2,12 @@ package eu.dnetlib.dhp.sx.graph.pangaea import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.{Logger, LoggerFactory} -import scala.collection.JavaConverters._ import scala.io.Source - +import scala.collection.JavaConverters._ object SparkGeneratePanagaeaDataset { @@ -28,17 +27,17 @@ object SparkGeneratePanagaeaDataset { parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}")) logger.info("Converting sequential file into Dataset") - val sc:SparkContext = spark.sparkContext + val sc: SparkContext = spark.sparkContext - val workingPath:String = parser.get("workingPath") + val workingPath: String = parser.get("workingPath") implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - val inputRDD:RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) + val inputRDD: RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) spark.createDataset(inputRDD).as[PangaeaDataModel] - .map(s => (s.identifier,s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) - .groupByKey(_._1)(Encoders.STRING) + .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) + .groupByKey(_._1)(Encoders.STRING) .agg(PangaeaUtils.getDatasetAggregator().toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset") @@ -46,7 +45,4 @@ object SparkGeneratePanagaeaDataset { } - - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index 93c554e04..7b1ddbb8f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.sx.graph.scholix - import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.sx.scholix._ import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} @@ -11,22 +10,23 @@ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import scala.collection.JavaConverters._ import scala.io.Source -import scala.language.postfixOps +import scala.collection.JavaConverters._ + object ScholixUtils { val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" - val DATE_RELATION_KEY:String = "RelationDate" - case class RelationVocabulary(original:String, inverse:String){} + val DATE_RELATION_KEY: String = "RelationDate" - case class RelatedEntities(id:String, relatedDataset:Long, relatedPublication:Long){} + case class RelationVocabulary(original: String, inverse: String) {} - val relations:Map[String, RelationVocabulary] = { - val input =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString + case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} + + val relations: Map[String, RelationVocabulary] = { + val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -35,12 +35,12 @@ object ScholixUtils { } - def extractRelationDate(relation: Relation):String = { + def extractRelationDate(relation: Relation): String = { - if (relation.getProperties== null || !relation.getProperties.isEmpty) + if (relation.getProperties == null || !relation.getProperties.isEmpty) null else { - val date =relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) + val date = relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) if (date.isDefined) date.get else @@ -48,9 +48,9 @@ object ScholixUtils { } } - def extractRelationDate(summary: ScholixSummary):String = { + def extractRelationDate(summary: ScholixSummary): String = { - if(summary.getDate== null || summary.getDate.isEmpty) + if (summary.getDate == null || summary.getDate.isEmpty) null else { summary.getDate.get(0) @@ -59,15 +59,14 @@ object ScholixUtils { } - def inverseRelationShip(rel:ScholixRelationship):ScholixRelationship = { + def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) } - - val statsAggregator:Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] with Serializable { + val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { override def zero: RelatedEntities = null override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { @@ -78,17 +77,16 @@ object ScholixUtils { if (b == null) RelatedEntities(a._1, relatedDataset, relatedPublication) else - RelatedEntities(a._1,b.relatedDataset+ relatedDataset, b.relatedPublication+ relatedPublication ) + RelatedEntities(a._1, b.relatedDataset + relatedDataset, b.relatedPublication + relatedPublication) } override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { - if (b1!= null && b2!= null) - RelatedEntities(b1.id, b1.relatedDataset+ b2.relatedDataset, b1.relatedPublication+ b2.relatedPublication) + if (b1 != null && b2 != null) + RelatedEntities(b1.id, b1.relatedDataset + b2.relatedDataset, b1.relatedPublication + b2.relatedPublication) + else if (b1 != null) + b1 else - if (b1!= null) - b1 - else b2 } @@ -104,12 +102,12 @@ object ScholixUtils { override def zero: Scholix = null - def scholix_complete(s:Scholix):Boolean ={ - if (s== null || s.getIdentifier==null) { + def scholix_complete(s: Scholix): Boolean = { + if (s == null || s.getIdentifier == null) { false } else if (s.getSource == null || s.getTarget == null) { - false - } + false + } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) false else @@ -121,7 +119,7 @@ object ScholixUtils { } override def merge(b1: Scholix, b2: Scholix): Scholix = { - if (scholix_complete(b1)) b1 else b2 + if (scholix_complete(b1)) b1 else b2 } override def finish(reduction: Scholix): Scholix = reduction @@ -132,7 +130,7 @@ object ScholixUtils { } - def createInverseScholixRelation(scholix: Scholix):Scholix = { + def createInverseScholixRelation(scholix: Scholix): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -144,34 +142,33 @@ object ScholixUtils { s - } - def extractCollectedFrom(summary:ScholixSummary): List[ScholixEntityId] = { - if (summary.getDatasources!= null && !summary.getDatasources.isEmpty) { - val l: List[ScholixEntityId] = summary.getDatasources.asScala.map{ + def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { + if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { + val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d => new ScholixEntityId(d.getDatasourceName, List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava) }(collection.breakOut) - l + l } else List() } - def extractCollectedFrom(relation: Relation) : List[ScholixEntityId] = { + def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c => - new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA,null)).asJava) + new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava) }(collection breakOut) l } else List() } - def generateCompleteScholix(scholix: Scholix, target:ScholixSummary): Scholix = { + def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -192,29 +189,28 @@ object ScholixUtils { r.setObjectType(summaryObject.getTypology.toString) r.setObjectSubType(summaryObject.getSubType) - if (summaryObject.getTitle!= null && !summaryObject.getTitle.isEmpty) - r.setTitle(summaryObject.getTitle.get(0)) + if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty) + r.setTitle(summaryObject.getTitle.get(0)) - if (summaryObject.getAuthor!= null && !summaryObject.getAuthor.isEmpty){ - val l:List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a,null)).toList + if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) { + val l: List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList if (l.nonEmpty) r.setCreator(l.asJava) } - if (summaryObject.getDate!= null && !summaryObject.getDate.isEmpty) + if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty) r.setPublicationDate(summaryObject.getDate.get(0)) - if (summaryObject.getPublisher!= null && !summaryObject.getPublisher.isEmpty) - { - val plist:List[ScholixEntityId] =summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList + if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) { + val plist: List[ScholixEntityId] = summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList if (plist.nonEmpty) r.setPublisher(plist.asJava) } - if (summaryObject.getDatasources!= null && !summaryObject.getDatasources.isEmpty) { + if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) { - val l:List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( + val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( new ScholixEntityId(c.getDatasourceName, List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava) , "collected", "complete" @@ -228,12 +224,9 @@ object ScholixUtils { } + def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { - - - def scholixFromSource(relation:Relation, source:ScholixSummary):Scholix = { - - if (relation== null || source== null) + if (relation == null || source == null) return null val s = new Scholix @@ -253,9 +246,9 @@ object ScholixUtils { s.setPublicationDate(d) - if (source.getPublisher!= null && !source.getPublisher.isEmpty) { + if (source.getPublisher != null && !source.getPublisher.isEmpty) { val l: List[ScholixEntityId] = source.getPublisher.asScala - .map{ + .map { p => new ScholixEntityId(p, null) }(collection.breakOut) @@ -265,7 +258,7 @@ object ScholixUtils { } val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) - if (semanticRelation== null) + if (semanticRelation == null) return null s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) s.setSource(generateScholixResourceFromSummary(source)) @@ -274,8 +267,8 @@ object ScholixUtils { } - def findURLForPID(pidValue:List[StructuredProperty], urls:List[String]):List[(StructuredProperty, String)] = { - pidValue.map{ + def findURLForPID(pidValue: List[StructuredProperty], urls: List[String]): List[(StructuredProperty, String)] = { + pidValue.map { p => val pv = p.getValue @@ -285,67 +278,67 @@ object ScholixUtils { } - def extractTypedIdentifierFromInstance(r:Result):List[ScholixIdentifier] = { + def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { if (r.getInstance() == null || r.getInstance().isEmpty) return List() - r.getInstance().asScala.filter(i => i.getUrl!= null && !i.getUrl.isEmpty) - .filter(i => i.getPid!= null && i.getUrl != null) + r.getInstance().asScala.filter(i => i.getUrl != null && !i.getUrl.isEmpty) + .filter(i => i.getPid != null && i.getUrl != null) .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList } - def resultToSummary(r:Result):ScholixSummary = { + def resultToSummary(r: Result): ScholixSummary = { val s = new ScholixSummary s.setId(r.getId) if (r.getPid == null || r.getPid.isEmpty) return null - val persistentIdentifiers:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) + val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) if (persistentIdentifiers.isEmpty) return null s.setLocalIdentifier(persistentIdentifiers.asJava) - if (r.isInstanceOf[Publication] ) + if (r.isInstanceOf[Publication]) s.setTypology(Typology.publication) else s.setTypology(Typology.dataset) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) - if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { - val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut) + if (r.getTitle != null && r.getTitle.asScala.nonEmpty) { + val titles: List[String] = r.getTitle.asScala.map(t => t.getValue)(collection breakOut) if (titles.nonEmpty) s.setTitle(titles.asJava) else - return null + return null } - if(r.getAuthor!= null && !r.getAuthor.isEmpty) { - val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut) + if (r.getAuthor != null && !r.getAuthor.isEmpty) { + val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname)(collection breakOut) if (authors nonEmpty) s.setAuthor(authors.asJava) } if (r.getInstance() != null) { - val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) + val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) if (dt.nonEmpty) s.setDate(dt.distinct.asJava) } - if (r.getDescription!= null && !r.getDescription.isEmpty) { - val d = r.getDescription.asScala.find(f => f!= null && f.getValue!=null) + if (r.getDescription != null && !r.getDescription.isEmpty) { + val d = r.getDescription.asScala.find(f => f != null && f.getValue != null) if (d.isDefined) s.setDescription(d.get.getValue) } - if (r.getSubject!= null && !r.getSubject.isEmpty) { - val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) + if (r.getSubject != null && !r.getSubject.isEmpty) { + val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) if (subjects.nonEmpty) s.setSubject(subjects.asJava) } - if (r.getPublisher!= null) + if (r.getPublisher != null) s.setPublisher(List(r.getPublisher.getValue).asJava) - if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) { - val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) + if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { + val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) if (cf.nonEmpty) s.setDatasources(cf.distinct.asJava) } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset index 05c875148..2c73183e2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset @@ -1,3 +1,3 @@ -{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} -{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} -{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala similarity index 96% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index a3a753a8a..7abce547f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -3,13 +3,9 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} -import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} -import org.json4s import org.json4s.DefaultFormats -import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala similarity index 98% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 5b00e9b6f..0922f2e19 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -4,10 +4,9 @@ import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats -import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} -import org.junit.jupiter.api.Test -import org.junit.jupiter.api.Assertions._ import org.json4s.jackson.Serialization.write +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test class TestPreprocess extends java.io.Serializable{ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 9a142d3c0..f1bd841d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -159,6 +159,7 @@ class ResolveEntitiesTest extends Serializable { val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala similarity index 95% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala index b90827e81..0d89cca85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala @@ -3,7 +3,6 @@ package eu.dnetlib.dhp.sx.pangaea import eu.dnetlib.dhp.sx.graph.pangaea.PangaeaUtils import org.junit.jupiter.api.Test -import java.util.TimeZone import java.text.SimpleDateFormat import java.util.Date import scala.io.Source From 9c82d670b87219fa28d21b582b8147af957de5ea Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 12:31:02 +0100 Subject: [PATCH 026/176] make class public in order to create javadoc --- .../eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java | 2 +- .../oa/matchers/simple/EnrichMissingPublicationDateTest.java | 2 +- .../eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java | 2 +- .../test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java | 2 +- .../eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java (100%) diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 45bfc785f..52e9917bb 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; @ExtendWith(MockitoExtension.class) -class UpdateMatcherTest { +public class UpdateMatcherTest { UpdateMatcher matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 550ded9f4..5af81a31a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.broker.objects.OaBrokerMainEntity; -class EnrichMissingPublicationDateTest { +public class EnrichMissingPublicationDateTest { final EnrichMissingPublicationDate matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index b532aa9f7..d93390e4a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -8,7 +8,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; -class SubscriptionUtilsTest { +public class SubscriptionUtilsTest { @Test void testVerifyListSimilar() { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index a8bc03e31..117bdeef4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,7 +9,7 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -class TrustUtilsTest { +public class TrustUtilsTest { private static final double THRESHOLD = 0.95; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java From 0136a8c266c74c91c9386226f745b87de5934e87 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Nov 2021 14:38:33 +0100 Subject: [PATCH 027/176] [Graph Dump] Change test to mirror that measure is at the level of the isntance --- .../java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 89b12a712..37ba96abe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -177,8 +177,8 @@ public class DumpJobTest { Assertions .assertTrue( gr - .getInstance() - .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( @@ -187,8 +187,8 @@ public class DumpJobTest { Assertions .assertTrue( gr - .getInstance() - .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( From 5dc5792722f7ec2c633f9a268105c859685ae02f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Nov 2021 14:39:12 +0100 Subject: [PATCH 028/176] [Graph Dump] Change test resource to mirror the movement of the measure element --- .../oa/graph/dump/addProjectInfo/publication_extendedmodel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel index 6b146405a..979cbf168 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -1,2 +1,2 @@ -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file From bd9a43cefdd54b51a4cc542a3b8cd87696d59003 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Nov 2021 09:20:43 +0100 Subject: [PATCH 029/176] Revert to 4094f2bb9ab5ad87a31b6691b8b853542d6b7685 --- .../oaf/utils/GraphCleaningFunctions.java | 4 +- .../scholix/SparkCreateActionset.scala | 0 .../scholix/SparkSaveActionSet.scala | 2 +- .../dhp/collection/CollectionUtils.scala | 0 .../dhp/datacite/AbstractRestClient.scala | 0 .../dhp/datacite/DataciteAPIImporter.scala | 0 .../DataciteToOAFTransformation.scala | 0 .../GenerateDataciteDatasetSpark.scala | 3 +- .../dnetlib/dhp/datacite/ImportDatacite.scala | 0 .../SparkDownloadUpdateDatacite.scala | 0 .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 1 - .../bio/SparkTransformBioDatabaseToOAF.scala | 12 +- .../ebi/SparkCreateBaselineDataFrame.scala | 2 +- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 3 +- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 5 +- .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 0 .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 20 ++- .../dhp/datacite/DataciteToOAFTest.scala | 0 .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 0 .../{ => actionmanager}/datacite/record.json | 0 .../broker/oa/matchers/UpdateMatcherTest.java | 2 +- .../EnrichMissingPublicationDateTest.java | 2 +- .../broker/oa/util/SubscriptionUtilsTest.java | 2 +- .../dhp/broker/oa/util/TrustUtilsTest.java | 2 +- .../doiboost/DoiBoostMappingUtil.scala | 0 .../SparkGenerateDOIBoostActionSet.scala | 0 .../doiboost/SparkGenerateDoiBoost.scala | 0 .../doiboost/crossref/Crossref2Oaf.scala | 9 +- .../doiboost/crossref/CrossrefDataset.scala | 25 +-- .../doiboost/crossref/CrossrefImporter.java | 0 .../dnetlib/doiboost/crossref/ESClient.java | 0 .../crossref/ExtractCrossrefRecords.java | 0 .../crossref/GenerateCrossrefDataset.scala | 20 ++- .../crossref/SparkMapDumpIntoOAF.scala | 4 +- .../crossref/UnpackCrtossrefEntries.scala | 9 +- .../dnetlib/doiboost/mag/MagDataModel.scala | 2 +- .../mag/SparkImportMagIntoDataset.scala | 7 +- .../doiboost/mag/SparkProcessMAG.scala | 20 ++- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 7 +- .../orcid/SparkConvertORCIDToOAF.scala | 8 +- .../doiboost/orcid/SparkPreprocessORCID.scala | 29 ++-- .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 8 +- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 3 +- .../doiboost/DoiBoostHostedByMapTest.scala | 2 +- .../dhp}/doiboost/NormalizeDoiTest.scala | 2 +- .../crossref/CrossrefMappingTest.scala | 0 .../dnetlib/doiboost/mag/MAGMappingTest.scala | 2 +- .../orcid/MappingORCIDToOAFTest.scala | 3 +- .../doiboost/uw/UnpayWallMappingTest.scala | 4 +- .../oa/graph/hostedbymap/Aggregators.scala | 2 +- .../SparkApplyHostedByMapToDatasource.scala | 13 +- .../SparkApplyHostedByMapToResult.scala | 18 ++- .../SparkPrepareHostedByInfoToApply.scala | 35 +++-- .../hostedbymap/SparkProduceHostedByMap.scala | 109 ++++++------- .../raw/CopyHdfsOafSparkApplication.scala | 10 +- .../resolution/SparkResolveEntities.scala | 25 +-- .../resolution/SparkResolveRelation.scala | 2 +- .../sx/graphimport/SparkDataciteToOAF.scala | 31 ++++ .../graph/SparkConvertDatasetToJsonRDD.scala | 10 +- .../sx/graph/SparkConvertObjectToJson.scala | 10 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 27 ++-- .../dhp/sx/graph/SparkCreateInputGraph.scala | 27 ++-- .../dhp/sx/graph/SparkCreateScholix.scala | 28 ++-- .../sx/graph/SparkCreateSummaryObject.scala | 12 +- .../dhp/sx/graph/pangaea/PangaeaUtils.scala | 1 - .../SparkGeneratePanagaeaDataset.scala | 18 ++- .../dhp/sx/graph/scholix/ScholixUtils.scala | 143 +++++++++--------- .../dhp/oa/graph/hostedbymap/TestApply.scala | 0 .../oa/graph/hostedbymap/TestPrepare.scala | 4 + .../oa/graph/hostedbymap/TestPreprocess.scala | 5 +- .../resolution/ResolveEntitiesTest.scala | 1 - .../sx/graph/scholix/ScholixGraphTest.scala | 0 .../dhp/sx/pangaea/PangaeaTransformTest.scala | 1 + .../dnetlib/dhp/oa/graph/resolution/dataset | 6 +- 74 files changed, 431 insertions(+), 331 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala (96%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/collection/CollectionUtils.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/AbstractRestClient.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala (95%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/ImportDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala (73%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala (93%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala (96%) rename dhp-workflows/dhp-aggregation/src/test/{scala => java}/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala (100%) rename dhp-workflows/dhp-aggregation/src/test/{scala => java}/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala (100%) rename dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/{ => actionmanager}/datacite/record.json (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala (99%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala (77%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/CrossrefImporter.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/ESClient.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala (73%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala (96%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala (88%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/mag/MagDataModel.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala (91%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala (84%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala (67%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala (80%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{scala/eu/dnetlib => java/eu/dnetlib/dhp}/doiboost/DoiBoostHostedByMapTest.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{scala/eu/dnetlib => java/eu/dnetlib/dhp}/doiboost/NormalizeDoiTest.scala (96%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/mag/MAGMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala (99%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala (81%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala (85%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala (74%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala (88%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala (79%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala (99%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala (69%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala (62%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala (68%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala (96%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala (98%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala (95%) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 43413b311..d8b1cded8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -27,8 +27,8 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static final int ORCID_LEN = 19; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*"; - public static final String TITLE_FILTER_REGEX = "(test)|\\W|\\d"; - public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; + public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]"; + public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10; public static T fixVocabularyNames(T value) { if (value instanceof Datasource) { diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala index 62d219b57..1df7ea3fb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.actionmanager.scholix import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, Oaf, Publication, Software, OtherResearchProduct, Relation} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index 3c8caa485..a63627d1c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -3,7 +3,8 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations -import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} +import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH +import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 853b24862..70dcc0184 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -7,7 +7,6 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.{compact, parse, render} import collection.JavaConverters._ - object BioDBToOAF { case class EBILinkItem(id: Long, links: String) {} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 73% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index fcceacd44..8ae8285e3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,9 +1,9 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved +import BioDBToOAF.ScholixResolved +import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -36,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 660a26a6c..17d21f19c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.bio.pubmed._ +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index 18e39387f..eab6b1dc6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,8 +1,9 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 12af4824b..8da617ca0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -37,7 +38,7 @@ object SparkEBILinksToOaf { ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null) + .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index d5d40ecfe..ecef32202 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -4,9 +4,10 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ +import scala.collection.JavaConverters._ import java.util.regex.Pattern -import collection.JavaConverters._ + /** * */ @@ -21,10 +22,10 @@ object PubMedToOaf { val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + /** * Cleaning the DOI Applying regex in order to * remove doi starting with URL - * * @param doi input DOI * @return cleaned DOI */ @@ -48,7 +49,7 @@ object PubMedToOaf { * starting from OAF instanceType value * * @param cobjQualifier OAF instance type - * @param vocabularies All dnet vocabularies + * @param vocabularies All dnet vocabularies * @return the correct instance */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { @@ -64,7 +65,7 @@ object PubMedToOaf { } /** - * Mapping the Pubmedjournal info into the OAF Journale + * Mapping the Pubmedjournal info into the OAF Journale * * @param j the pubmedJournal * @return the OAF Journal @@ -90,8 +91,9 @@ object PubMedToOaf { * Find vocabulary term into synonyms and term in the vocabulary * * @param vocabularyName the input vocabulary name - * @param vocabularies all the vocabularies - * @param term the term to find + * @param vocabularies all the vocabularies + * @param term the term to find + * * @return the cleaned term value */ def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { @@ -102,9 +104,10 @@ object PubMedToOaf { /** - * Map the Pubmed Article into the OAF instance + * Map the Pubmed Article into the OAF instance * - * @param article the pubmed articles + * + * @param article the pubmed articles * @param vocabularies the vocabularies * @return The OAF instance if the mapping did not fail */ @@ -182,6 +185,7 @@ object PubMedToOaf { //-------------------------------------------------------------------------------------- + // RESULT MAPPING //-------------------------------------------------------------------------------------- result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 52e9917bb..45bfc785f 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; @ExtendWith(MockitoExtension.class) -public class UpdateMatcherTest { +class UpdateMatcherTest { UpdateMatcher matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 5af81a31a..550ded9f4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.broker.objects.OaBrokerMainEntity; -public class EnrichMissingPublicationDateTest { +class EnrichMissingPublicationDateTest { final EnrichMissingPublicationDate matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index d93390e4a..b532aa9f7 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -8,7 +8,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; -public class SubscriptionUtilsTest { +class SubscriptionUtilsTest { @Test void testVerifyListSimilar() { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 117bdeef4..a8bc03e31 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,7 +9,7 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -public class TrustUtilsTest { +class TrustUtilsTest { private static final double THRESHOLD = 0.95; diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index edca4a180..1b1c850ba 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,19 +4,20 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST._ +import org.json4s.JsonAST.{JValue, _} import org.json4s.jackson.JsonMethods._ import org.slf4j.{Logger, LoggerFactory} -import java.util import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex +import java.util + +import eu.dnetlib.doiboost.DoiBoostMappingUtil case class CrossrefDT(doi: String, json:String, timestamp: Long) {} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala similarity index 77% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 6a1c701af..159b817c7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -6,7 +6,7 @@ import org.apache.commons.io.IOUtils import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -17,12 +17,12 @@ object CrossrefDataset { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - def to_item(input: String): CrossrefDT = { + def to_item(input:String):CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val ts: Long = (json \ "indexed" \ "timestamp").extract[Long] - val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] + val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) CrossrefDT(doi, input, ts) } @@ -30,6 +30,7 @@ object CrossrefDataset { def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) parser.parseArgument(args) @@ -53,7 +54,7 @@ object CrossrefDataset { return b - if (a.timestamp > b.timestamp) { + if(a.timestamp >b.timestamp) { return a } b @@ -65,7 +66,7 @@ object CrossrefDataset { if (a == null) return b - if (a.timestamp > b.timestamp) { + if(a.timestamp >b.timestamp) { return a } b @@ -78,20 +79,20 @@ object CrossrefDataset { override def finish(reduction: CrossrefDT): CrossrefDT = reduction } - val workingPath: String = parser.get("workingPath") + val workingPath:String = parser.get("workingPath") - val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] + val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] val update = - spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) - .map(i => CrossrefImporter.decompressBlob(i._2.toString)) - .map(i => to_item(i))) + spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i =>CrossrefImporter.decompressBlob(i._2.toString)) + .map(i =>to_item(i))) main_ds.union(update).groupByKey(_.doi) .agg(crossrefAggregator.toColumn) - .map(s => s._2) + .map(s=>s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala similarity index 73% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index 6d03abc25..526ff7b3a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -2,12 +2,17 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item +import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass +import org.apache.hadoop.io.{IntWritable, Text} +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats -import org.json4s.jackson.JsonMethods.parse +import org.json4s.JsonAST.JArray +import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -19,10 +24,11 @@ object GenerateCrossrefDataset { implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT] + def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) - val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long] CrossrefDT(doi, meta, timestamp) @@ -45,14 +51,14 @@ object GenerateCrossrefDataset { import spark.implicits._ - val tmp: RDD[String] = sc.textFile(sourcePath, 6000) + val tmp : RDD[String] = sc.textFile(sourcePath,6000) spark.createDataset(tmp) .map(entry => crossrefElement(entry)) .write.mode(SaveMode.Overwrite).save(targetPath) - // .map(meta => crossrefElement(meta)) - // .toDS.as[CrossrefDT] - // .write.mode(SaveMode.Overwrite).save(targetPath) +// .map(meta => crossrefElement(meta)) +// .toDS.as[CrossrefDT] +// .write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index fa55b9fb9..c65916610 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -4,8 +4,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset} import org.apache.commons.io.IOUtils + import org.apache.spark.SparkConf -import org.apache.spark.sql._ + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala similarity index 88% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala index 191c4587e..95ecb568b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala @@ -2,8 +2,8 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.hadoop.io.compress.GzipCodec -import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST.JArray @@ -17,7 +17,9 @@ object UnpackCrtossrefEntries { val log: Logger = LoggerFactory.getLogger(UnpackCrtossrefEntries.getClass) - def extractDump(input: String): List[String] = { + + + def extractDump(input:String):List[String] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -28,6 +30,7 @@ object UnpackCrtossrefEntries { } + def main(args: Array[String]): Unit = { val conf = new SparkConf val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) @@ -42,7 +45,7 @@ object UnpackCrtossrefEntries { .getOrCreate() val sc: SparkContext = spark.sparkContext - sc.wholeTextFiles(sourcePath, 6000).flatMap(d => extractDump(d._2)) + sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2)) .saveAsTextFile(targetPath, classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala index 0a6fa00f0..fd9629024 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import scala.collection.JavaConverters._ import scala.collection.mutable diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index d25a4893f..a68d0bb2d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -3,8 +3,8 @@ package eu.dnetlib.doiboost.mag import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.types._ import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.spark.sql.types._ import org.slf4j.{Logger, LoggerFactory} object SparkImportMagIntoDataset { @@ -24,13 +24,13 @@ object SparkImportMagIntoDataset { "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), + "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), + "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), @@ -75,6 +75,7 @@ object SparkImportMagIntoDataset { .master(parser.get("master")).getOrCreate() + stream.foreach { case (k, v) => val s: StructType = getSchema(k) val df = spark.read diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala similarity index 91% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 932725446..016279787 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -5,16 +5,19 @@ import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.functions.{col, collect_list, struct} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions._ import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} + import scala.collection.JavaConverters._ + object SparkProcessMAG { - def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { + def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={ d.where(col("Doi").isNotNull) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) + .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) .map(_._2)(Encoders.product[MagPapers]) .map(mp => { new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), @@ -95,13 +98,13 @@ object SparkProcessMAG { var magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val conference = spark.read.load(s"$sourcePath/ConferenceInstances") - .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate", $"EndDate") + .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" ) val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) - .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate", $"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] + .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") @@ -119,7 +122,7 @@ object SparkProcessMAG { magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") + ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") @@ -145,10 +148,11 @@ object SparkProcessMAG { spark.read.load(s"$workingPath/mag_publication").as[Publication] .filter(p => p.getId == null) .groupByKey(p => p.getId) - .reduceGroups((a: Publication, b: Publication) => ConversionUtil.mergePublication(a, b)) + .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 11031f9ca..1cd3f7028 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -4,16 +4,17 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} +import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI} import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo} import org.apache.commons.lang.StringUtils +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala similarity index 84% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 1b189e296..fa4a93e00 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -11,10 +11,10 @@ object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def run(spark: SparkSession, workingPath: String, targetPath: String): Unit = { + def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - val dataset: Dataset[ORCIDItem] = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) @@ -35,8 +35,8 @@ object SparkConvertORCIDToOAF { val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark, workingPath, targetPath) + run(spark,workingPath, targetPath) } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala similarity index 67% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index 153be5dd1..31f331912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -1,45 +1,48 @@ package eu.dnetlib.doiboost.orcid +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.dhp.schema.orcid.OrcidDOI import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions.{col, collect_list} -import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkPreprocessORCID { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def fixORCIDItem(item: ORCIDItem): ORCIDItem = { - ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + def fixORCIDItem(item :ORCIDItem):ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) } - def run(spark: SparkSession, sourcePath: String, workingPath: String): Unit = { + def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val inputRDD: RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s != null).filter(s => ORCIDToOAF.authorValid(s)) + val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s != null) + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - val authors: Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - val works: Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i => { + .map(i =>{ val doi = i._1.doi val author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) + (doi, author) + }).groupBy(col("_1").alias("doi")) .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] .map(s => fixORCIDItem(s)) .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") @@ -64,4 +67,4 @@ object SparkPreprocessORCID { } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala similarity index 80% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 70290018d..4530926f1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -1,14 +1,16 @@ package eu.dnetlib.doiboost.uw import eu.dnetlib.dhp.application.ArgumentApplicationParser + import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + object SparkMapUnpayWallToOAF { def main(args: Array[String]): Unit = { @@ -30,11 +32,11 @@ object SparkMapUnpayWallToOAF { val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val inputRDD: RDD[String] = spark.sparkContext.textFile(s"$sourcePath") + val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath") logger.info("Converting UnpayWall to OAF") - val d: Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)).as[Publication] + val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication] d.write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index bf5694965..c8324cde1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -4,13 +4,14 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication} import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala index 049ac37f4..4912648be 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.doiboost +package eu.dnetlib.dhp.doiboost import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset} import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType} diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala index bdf845f19..a9a841ee9 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.doiboost +package eu.dnetlib.dhp.doiboost import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 7403e103e..46d4ec08d 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.mag import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, SparkSession} import org.codehaus.jackson.map.ObjectMapper -import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test +import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} import java.sql.Timestamp diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index a5ce6296c..b484dc087 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -10,9 +10,10 @@ import org.junit.jupiter.api.io.TempDir import org.slf4j.{Logger, LoggerFactory} import java.nio.file.Path -import scala.collection.JavaConversions._ import scala.io.Source +import scala.collection.JavaConversions._ + class MappingORCIDToOAFTest { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) val mapper = new ObjectMapper() diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala index 012ed3da0..fa696fffc 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala @@ -3,11 +3,11 @@ package eu.dnetlib.doiboost.uw import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.OpenAccessRoute -import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} import scala.io.Source +import org.junit.jupiter.api.Assertions._ +import org.slf4j.{Logger, LoggerFactory} class UnpayWallMappingTest { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index ad4e1c96e..ce383292c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala similarity index 81% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 38af3eee4..1b18ba3ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -2,12 +2,13 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.Datasource +import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} @@ -51,18 +52,18 @@ object SparkApplyHostedByMapToDatasource { val mapper = new ObjectMapper() - val dats: Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(preparedInfoPath) + val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression", "gzip") + .option("compression","gzip") .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala similarity index 85% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index d360da2e9..0e047d016 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -5,13 +5,16 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Instance, OpenAccessRoute, Publication} +import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} + import scala.collection.JavaConverters._ + + object SparkApplyHostedByMapToResult { def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { @@ -36,7 +39,6 @@ object SparkApplyHostedByMapToResult { p })(Encoders.bean(classOf[Publication])) } - def main(args: Array[String]): Unit = { @@ -65,18 +67,18 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs: Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo: Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression", "gzip") + .option("compression","gzip") .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala similarity index 74% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 87e203e4b..b7a7d352f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,58 +3,61 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo + import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} + + object SparkPrepareHostedByInfoToApply { implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - def getList(id: String, j: Journal, name: String): List[EntityInfo] = { - var lst: List[EntityInfo] = List() + def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { + var lst:List[EntityInfo] = List() - if (j.getIssnLinking != null && !j.getIssnLinking.equals("")) { + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst } - if (j.getIssnOnline != null && !j.getIssnOnline.equals("")) { + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst } - if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")) { + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst } lst } - def prepareResultInfo(spark: SparkSession, publicationPath: String): Dataset[EntityInfo] = { + def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) val mapper = new ObjectMapper() - val dd: Dataset[Publication] = spark.read.textFile(publicationPath) + val dd : Dataset[Publication] = spark.read.textFile(publicationPath) .map(r => mapper.readValue(r, classOf[Publication])) - dd.filter(p => p.getJournal != null).flatMap(p => getList(p.getId, p.getJournal, "")) + dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) } - def toEntityInfo(input: String): EntityInfo = { + def toEntityInfo(input:String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] toEntityItem(c.keys.head, c.values.head) } - def toEntityItem(journal_id: String, hbi: HostedByItemType): EntityInfo = { + def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) @@ -64,7 +67,7 @@ object SparkPrepareHostedByInfoToApply { Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 - if (t2._2 != null) { + if(t2._2 != null ){ val ds = t2._2 res.setHostedById(ds.getId) res.setOpenAccess(ds.getOpenAccess) @@ -104,10 +107,10 @@ object SparkPrepareHostedByInfoToApply { //STEP1: read the hostedbymap and transform it in EntityInfo - val hostedByInfo: Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) - val resultInfoDataset: Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 6dfe35623..1ee1d5d1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,39 +1,41 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - +import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path import java.io.PrintWriter +import org.apache.hadoop.io.compress.GzipCodec + + object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)): HostedByItemType = { + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 val gold: HostedByInfo = input._2 val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => null + case whoa => null } } @@ -42,7 +44,7 @@ object SparkProduceHostedByMap { implicit val formats = org.json4s.DefaultFormats - val map: Map[String, HostedByItemType] = Map(input._1 -> input._2) + val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) Serialization.write(map) @@ -50,33 +52,34 @@ object SparkProduceHostedByMap { } - def getHostedByItemType(id: String, officialname: String, issn: String, eissn: String, issnl: String, oa: Boolean): HostedByItemType = { - if (issn != null) { - if (eissn != null) { - if (issnl != null) { - HostedByItemType(id, officialname, issn, eissn, issnl, oa) - } else { - HostedByItemType(id, officialname, issn, eissn, "", oa) + + def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { + if(issn != null){ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, issn, eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, eissn, "" , oa) } - } else { - if (issnl != null) { - HostedByItemType(id, officialname, issn, "", issnl, oa) - } else { - HostedByItemType(id, officialname, issn, "", "", oa) + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, issn, "", issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, "", "" , oa) } } - } else { - if (eissn != null) { - if (issnl != null) { - HostedByItemType(id, officialname, "", eissn, issnl, oa) - } else { - HostedByItemType(id, officialname, "", eissn, "", oa) + }else{ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, "", eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, "", eissn, "" , oa) } - } else { - if (issnl != null) { - HostedByItemType(id, officialname, "", "", issnl, oa) - } else { - HostedByItemType("", "", "", "", "", oa) + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, "", "", issnl , oa) + }else{ + HostedByItemType("", "", "", "", "" , oa) } } } @@ -87,10 +90,10 @@ object SparkProduceHostedByMap { return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) } - HostedByItemType("", "", "", "", "", false) + HostedByItemType("","","","","",false) } - def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { import spark.implicits._ @@ -99,10 +102,10 @@ object SparkProduceHostedByMap { implicit var encoderD = Encoders.kryo[Datasource] - val dd: Dataset[Datasource] = spark.read.textFile(datasourcePath) + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[Datasource])) - dd.map { ddt => oaToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) } @@ -112,17 +115,17 @@ object SparkProduceHostedByMap { } - def goldHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] val mapper = new ObjectMapper() - val dd: Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) - dd.map { ddt => goldToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) } @@ -131,40 +134,41 @@ object SparkProduceHostedByMap { return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) } - def doajHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] val mapper = new ObjectMapper() - val dd: Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[DOAJModel])) - dd.map { ddt => doajToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) } def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { - var lst: List[(String, HostedByItemType)] = List() - if (!input.issn.equals("")) { + var lst : List[(String, HostedByItemType)] = List() + if(!input.issn.equals("")){ lst = (input.issn, input) :: lst } - if (!input.eissn.equals("")) { + if(!input.eissn.equals("")){ lst = (input.eissn, input) :: lst } - if (!input.lissn.equals("")) { + if(!input.lissn.equals("")){ lst = (input.lissn, input) :: lst } lst } - def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode: String): Unit = { + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { val conf = new Configuration() conf.set("fs.defaultFS", hdfsNameNode) - val fs = FileSystem.get(conf) + val fs= FileSystem.get(conf) val output = fs.create(new Path(outputPath)) val writer = new PrintWriter(output) try { @@ -178,6 +182,7 @@ object SparkProduceHostedByMap { } + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -208,7 +213,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath, classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala similarity index 88% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index 0179cc266..c7ad1890d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -4,14 +4,20 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils +import org.apache.commons.io.IOUtils +import org.apache.commons.lang3.StringUtils +import org.apache.http.client.methods.HttpGet +import org.apache.http.impl.client.HttpClients import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory -import scala.io.Source import scala.collection.JavaConverters._ +import scala.io.Source + object CopyHdfsOafSparkApplication { def main(args: Array[String]): Unit = { @@ -53,7 +59,7 @@ object CopyHdfsOafSparkApplication { if (validPaths.nonEmpty) { val oaf = spark.read.load(validPaths: _*).as[Oaf] val mapper = new ObjectMapper() - val l = ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList l.foreach( e => oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala similarity index 79% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 6b4a501d6..316b8afed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils @@ -13,7 +14,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkResolveEntities { val mapper = new ObjectMapper() - val entities = List(EntityType.dataset, EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) @@ -50,10 +51,10 @@ object SparkResolveEntities { fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) } - } +} - def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { +def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ @@ -70,22 +71,22 @@ object SparkResolveEntities { } - def deserializeObject(input: String, entity: EntityType): Result = { + def deserializeObject(input:String, entity:EntityType ) :Result = { - entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software => mapper.readValue(input, classOf[Software]) - case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) - } + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software=> mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) + } } - def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String) = { + def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re: Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] entities.foreach { e => diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index c7f9b2d0e..cd517dd5e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.oaf.Relation +import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala new file mode 100644 index 000000000..9e905d806 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.oa.sx.graphimport + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession + +object SparkDataciteToOAF { + + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + import spark.implicits._ + + + val sc = spark.sparkContext + + val inputPath = parser.get("inputPath") + + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala similarity index 69% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 9d16cf907..3ee0c7dd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Result +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf @@ -29,13 +29,13 @@ object SparkConvertDatasetToJsonRDD { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val resultObject = List("publication", "dataset", "software", "otherResearchProduct") + val resultObject = List("publication","dataset","software", "otherResearchProduct") val mapper = new ObjectMapper() - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - resultObject.foreach { item => - spark.read.load(s"$sourcePath/$item").as[Result].map(r => mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + resultObject.foreach{item => + spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index cc1b97fd6..846ac37af 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import org.apache.commons.io.IOUtils -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} +import org.apache.hadoop.io.compress._ object SparkConvertObjectToJson { @@ -32,8 +32,8 @@ object SparkConvertObjectToJson { log.info(s"objectType -> $objectType") - implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] - implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val scholixEncoder :Encoder[Scholix]= Encoders.kryo[Scholix] + implicit val summaryEncoder :Encoder[ScholixSummary]= Encoders.kryo[ScholixSummary] val mapper = new ObjectMapper @@ -42,11 +42,11 @@ object SparkConvertObjectToJson { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala similarity index 62% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 2eb5e3a35..4b82fe645 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,12 +2,11 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} - object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { @@ -32,39 +31,39 @@ object SparkConvertRDDtoDataset { val entityPath = s"$t/entities" val relPath = s"$t/relation" val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) log.info("Converting dataset") - val rddDataset = spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") log.info("Converting publication") - val rddPublication = spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") log.info("Converting software") - val rddSoftware = spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") log.info("Converting otherresearchproduct") - val rddOtherResearchProduct = spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") log.info("Converting Relation") - val relationSemanticFilter = List("cites", "iscitedby", "merges", "ismergedin") + val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") - val rddRelation = spark.sparkContext.textFile(s"$sourcePath/relation") + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index b6f678967..350b00c5e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + + object SparkCreateInputGraph { def main(args: Array[String]): Unit = { @@ -31,7 +33,7 @@ object SparkCreateInputGraph { ) - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) @@ -39,13 +41,16 @@ object SparkCreateInputGraph { implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + + + val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val oafDs: Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] + val oafDs:Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] log.info("Extract Publication") @@ -65,27 +70,27 @@ object SparkCreateInputGraph { resultObject.foreach { r => log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}", s"$targetPath/preprocess/${r._1}", spark, r._2) + makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/preprocess/${r._1}",spark, r._2) } } - def extractEntities[T <: Oaf](oafDs: Dataset[Oaf], targetPath: String, clazz: Class[T], log: Logger): Unit = { + def extractEntities[T <: Oaf ](oafDs:Dataset[Oaf], targetPath:String, clazz:Class[T], log:Logger) :Unit = { - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) log.info(s"Extract ${clazz.getSimpleName}") oafDs.filter(o => o.isInstanceOf[T]).map(p => p.asInstanceOf[T]).write.mode(SaveMode.Overwrite).save(targetPath) } - def makeDatasetUnique[T <: Result](sourcePath: String, targetPath: String, spark: SparkSession, clazz: Class[T]): Unit = { + def makeDatasetUnique[T <: Result ](sourcePath:String, targetPath:String, spark:SparkSession, clazz:Class[T]) :Unit = { import spark.implicits._ - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) - val ds: Dataset[T] = spark.read.load(sourcePath).as[T] + val ds:Dataset[T] = spark.read.load(sourcePath).as[T] - ds.groupByKey(_.getId).reduceGroups { (x, y) => + ds.groupByKey(_.getId).reduceGroups{(x,y) => x.mergeFrom(y) x }.map(_._2).write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index 9930c57af..e4fcd2782 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.RelatedEntities import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.count -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkCreateScholix { @@ -42,7 +42,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] - .filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] @@ -51,54 +51,54 @@ object SparkCreateScholix { relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Relation), (String, ScholixSummary)) => - if (input._1 != null && input._2 != null) { + if (input._1!= null && input._2!= null) { val rel: Relation = input._1._2 val source: ScholixSummary = input._2._2 (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) } - else null + else null }(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .filter(r => r != null) + .filter(r => r!= null) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) scholixSource.joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Scholix), (String, ScholixSummary)) => - if (input._2 == null) { + if (input._2== null) { null } else { val s: Scholix = input._1._2 val target: ScholixSummary = input._2._2 ScholixUtils.generateCompleteScholix(s, target) } - }.filter(s => s != null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") + }.filter(s => s!= null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") val scholix_o_v: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] scholix_o_v.flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))).as[Scholix] - .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .map(s=> (s.getIdentifier,s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) .groupByKey(_._1) .agg(ScholixUtils.scholixAggregator.toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix") - val scholix_final: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] + val scholix_final:Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] - val stats: Dataset[(String, String, Long)] = scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String, String, Long)] + val stats:Dataset[(String,String,Long)]= scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String,String,Long)] stats - .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0)) + .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 )) .groupByKey(_.id) - .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset + b.relatedDataset, a.relatedPublication + b.relatedPublication)) + .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset+b.relatedDataset, a.relatedPublication+b.relatedPublication)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/related_entities") - val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) + val relatedEntitiesDS:Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication>0 || r.relatedDataset > 0) - relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map { i => + relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map{i => val re = i._1 val sum = i._2._2 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala similarity index 68% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index 4274cae5a..0970375f5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkCreateSummaryObject { @@ -28,15 +28,15 @@ object SparkCreateSummaryObject { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds: Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) + val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s != null).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala index c70397d04..193512474 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -5,7 +5,6 @@ import org.apache.spark.sql.{Encoder, Encoders} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse - import java.util.regex.Pattern import scala.language.postfixOps import scala.xml.{Elem, Node, XML} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index f1a4553ea..79c75d6df 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -2,12 +2,13 @@ package eu.dnetlib.dhp.sx.graph.pangaea import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import scala.io.Source import scala.collection.JavaConverters._ +import scala.io.Source + object SparkGeneratePanagaeaDataset { @@ -27,17 +28,17 @@ object SparkGeneratePanagaeaDataset { parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}")) logger.info("Converting sequential file into Dataset") - val sc: SparkContext = spark.sparkContext + val sc:SparkContext = spark.sparkContext - val workingPath: String = parser.get("workingPath") + val workingPath:String = parser.get("workingPath") implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - val inputRDD: RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) + val inputRDD:RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) spark.createDataset(inputRDD).as[PangaeaDataModel] - .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) - .groupByKey(_._1)(Encoders.STRING) + .map(s => (s.identifier,s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) + .groupByKey(_._1)(Encoders.STRING) .agg(PangaeaUtils.getDatasetAggregator().toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset") @@ -45,4 +46,7 @@ object SparkGeneratePanagaeaDataset { } + + + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index 7b1ddbb8f..93c554e04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -1,5 +1,6 @@ package eu.dnetlib.dhp.sx.graph.scholix + import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.sx.scholix._ import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} @@ -10,23 +11,22 @@ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import scala.io.Source import scala.collection.JavaConverters._ - +import scala.io.Source +import scala.language.postfixOps object ScholixUtils { val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" - val DATE_RELATION_KEY: String = "RelationDate" + val DATE_RELATION_KEY:String = "RelationDate" + case class RelationVocabulary(original:String, inverse:String){} - case class RelationVocabulary(original: String, inverse: String) {} + case class RelatedEntities(id:String, relatedDataset:Long, relatedPublication:Long){} - case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} - - val relations: Map[String, RelationVocabulary] = { - val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString + val relations:Map[String, RelationVocabulary] = { + val input =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -35,12 +35,12 @@ object ScholixUtils { } - def extractRelationDate(relation: Relation): String = { + def extractRelationDate(relation: Relation):String = { - if (relation.getProperties == null || !relation.getProperties.isEmpty) + if (relation.getProperties== null || !relation.getProperties.isEmpty) null else { - val date = relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) + val date =relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) if (date.isDefined) date.get else @@ -48,9 +48,9 @@ object ScholixUtils { } } - def extractRelationDate(summary: ScholixSummary): String = { + def extractRelationDate(summary: ScholixSummary):String = { - if (summary.getDate == null || summary.getDate.isEmpty) + if(summary.getDate== null || summary.getDate.isEmpty) null else { summary.getDate.get(0) @@ -59,14 +59,15 @@ object ScholixUtils { } - def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { + def inverseRelationShip(rel:ScholixRelationship):ScholixRelationship = { new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) } - val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { + + val statsAggregator:Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] with Serializable { override def zero: RelatedEntities = null override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { @@ -77,16 +78,17 @@ object ScholixUtils { if (b == null) RelatedEntities(a._1, relatedDataset, relatedPublication) else - RelatedEntities(a._1, b.relatedDataset + relatedDataset, b.relatedPublication + relatedPublication) + RelatedEntities(a._1,b.relatedDataset+ relatedDataset, b.relatedPublication+ relatedPublication ) } override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { - if (b1 != null && b2 != null) - RelatedEntities(b1.id, b1.relatedDataset + b2.relatedDataset, b1.relatedPublication + b2.relatedPublication) + if (b1!= null && b2!= null) + RelatedEntities(b1.id, b1.relatedDataset+ b2.relatedDataset, b1.relatedPublication+ b2.relatedPublication) - else if (b1 != null) - b1 else + if (b1!= null) + b1 + else b2 } @@ -102,12 +104,12 @@ object ScholixUtils { override def zero: Scholix = null - def scholix_complete(s: Scholix): Boolean = { - if (s == null || s.getIdentifier == null) { + def scholix_complete(s:Scholix):Boolean ={ + if (s== null || s.getIdentifier==null) { false } else if (s.getSource == null || s.getTarget == null) { - false - } + false + } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) false else @@ -119,7 +121,7 @@ object ScholixUtils { } override def merge(b1: Scholix, b2: Scholix): Scholix = { - if (scholix_complete(b1)) b1 else b2 + if (scholix_complete(b1)) b1 else b2 } override def finish(reduction: Scholix): Scholix = reduction @@ -130,7 +132,7 @@ object ScholixUtils { } - def createInverseScholixRelation(scholix: Scholix): Scholix = { + def createInverseScholixRelation(scholix: Scholix):Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -142,33 +144,34 @@ object ScholixUtils { s + } - def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { - if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { - val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { + def extractCollectedFrom(summary:ScholixSummary): List[ScholixEntityId] = { + if (summary.getDatasources!= null && !summary.getDatasources.isEmpty) { + val l: List[ScholixEntityId] = summary.getDatasources.asScala.map{ d => new ScholixEntityId(d.getDatasourceName, List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava) }(collection.breakOut) - l + l } else List() } - def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { + def extractCollectedFrom(relation: Relation) : List[ScholixEntityId] = { if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c => - new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava) + new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA,null)).asJava) }(collection breakOut) l } else List() } - def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = { + def generateCompleteScholix(scholix: Scholix, target:ScholixSummary): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -189,28 +192,29 @@ object ScholixUtils { r.setObjectType(summaryObject.getTypology.toString) r.setObjectSubType(summaryObject.getSubType) - if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty) - r.setTitle(summaryObject.getTitle.get(0)) + if (summaryObject.getTitle!= null && !summaryObject.getTitle.isEmpty) + r.setTitle(summaryObject.getTitle.get(0)) - if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) { - val l: List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList + if (summaryObject.getAuthor!= null && !summaryObject.getAuthor.isEmpty){ + val l:List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a,null)).toList if (l.nonEmpty) r.setCreator(l.asJava) } - if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty) + if (summaryObject.getDate!= null && !summaryObject.getDate.isEmpty) r.setPublicationDate(summaryObject.getDate.get(0)) - if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) { - val plist: List[ScholixEntityId] = summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList + if (summaryObject.getPublisher!= null && !summaryObject.getPublisher.isEmpty) + { + val plist:List[ScholixEntityId] =summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList if (plist.nonEmpty) r.setPublisher(plist.asJava) } - if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) { + if (summaryObject.getDatasources!= null && !summaryObject.getDatasources.isEmpty) { - val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( + val l:List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( new ScholixEntityId(c.getDatasourceName, List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava) , "collected", "complete" @@ -224,9 +228,12 @@ object ScholixUtils { } - def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { - if (relation == null || source == null) + + + def scholixFromSource(relation:Relation, source:ScholixSummary):Scholix = { + + if (relation== null || source== null) return null val s = new Scholix @@ -246,9 +253,9 @@ object ScholixUtils { s.setPublicationDate(d) - if (source.getPublisher != null && !source.getPublisher.isEmpty) { + if (source.getPublisher!= null && !source.getPublisher.isEmpty) { val l: List[ScholixEntityId] = source.getPublisher.asScala - .map { + .map{ p => new ScholixEntityId(p, null) }(collection.breakOut) @@ -258,7 +265,7 @@ object ScholixUtils { } val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) - if (semanticRelation == null) + if (semanticRelation== null) return null s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) s.setSource(generateScholixResourceFromSummary(source)) @@ -267,8 +274,8 @@ object ScholixUtils { } - def findURLForPID(pidValue: List[StructuredProperty], urls: List[String]): List[(StructuredProperty, String)] = { - pidValue.map { + def findURLForPID(pidValue:List[StructuredProperty], urls:List[String]):List[(StructuredProperty, String)] = { + pidValue.map{ p => val pv = p.getValue @@ -278,67 +285,67 @@ object ScholixUtils { } - def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { + def extractTypedIdentifierFromInstance(r:Result):List[ScholixIdentifier] = { if (r.getInstance() == null || r.getInstance().isEmpty) return List() - r.getInstance().asScala.filter(i => i.getUrl != null && !i.getUrl.isEmpty) - .filter(i => i.getPid != null && i.getUrl != null) + r.getInstance().asScala.filter(i => i.getUrl!= null && !i.getUrl.isEmpty) + .filter(i => i.getPid!= null && i.getUrl != null) .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList } - def resultToSummary(r: Result): ScholixSummary = { + def resultToSummary(r:Result):ScholixSummary = { val s = new ScholixSummary s.setId(r.getId) if (r.getPid == null || r.getPid.isEmpty) return null - val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) + val persistentIdentifiers:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) if (persistentIdentifiers.isEmpty) return null s.setLocalIdentifier(persistentIdentifiers.asJava) - if (r.isInstanceOf[Publication]) + if (r.isInstanceOf[Publication] ) s.setTypology(Typology.publication) else s.setTypology(Typology.dataset) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) - if (r.getTitle != null && r.getTitle.asScala.nonEmpty) { - val titles: List[String] = r.getTitle.asScala.map(t => t.getValue)(collection breakOut) + if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { + val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut) if (titles.nonEmpty) s.setTitle(titles.asJava) else - return null + return null } - if (r.getAuthor != null && !r.getAuthor.isEmpty) { - val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname)(collection breakOut) + if(r.getAuthor!= null && !r.getAuthor.isEmpty) { + val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut) if (authors nonEmpty) s.setAuthor(authors.asJava) } if (r.getInstance() != null) { - val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) + val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) if (dt.nonEmpty) s.setDate(dt.distinct.asJava) } - if (r.getDescription != null && !r.getDescription.isEmpty) { - val d = r.getDescription.asScala.find(f => f != null && f.getValue != null) + if (r.getDescription!= null && !r.getDescription.isEmpty) { + val d = r.getDescription.asScala.find(f => f!= null && f.getValue!=null) if (d.isDefined) s.setDescription(d.get.getValue) } - if (r.getSubject != null && !r.getSubject.isEmpty) { - val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) + if (r.getSubject!= null && !r.getSubject.isEmpty) { + val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) if (subjects.nonEmpty) s.setSubject(subjects.asJava) } - if (r.getPublisher != null) + if (r.getPublisher!= null) s.setPublisher(List(r.getPublisher.getValue).asJava) - if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { - val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) + if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) { + val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) if (cf.nonEmpty) s.setDatasources(cf.distinct.asJava) } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala similarity index 96% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index 7abce547f..a3a753a8a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -3,9 +3,13 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s import org.json4s.DefaultFormats +import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala similarity index 98% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 0922f2e19..5b00e9b6f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -4,9 +4,10 @@ import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats -import org.json4s.jackson.Serialization.write -import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test +import org.junit.jupiter.api.Assertions._ +import org.json4s.jackson.Serialization.write class TestPreprocess extends java.io.Serializable{ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index f1bd841d1..9a142d3c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -159,7 +159,6 @@ class ResolveEntitiesTest extends Serializable { val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) - val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala similarity index 95% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala index 0d89cca85..b90827e81 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.sx.pangaea import eu.dnetlib.dhp.sx.graph.pangaea.PangaeaUtils import org.junit.jupiter.api.Test +import java.util.TimeZone import java.text.SimpleDateFormat import java.util.Date import scala.io.Source diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset index 2c73183e2..05c875148 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset @@ -1,3 +1,3 @@ -{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} -{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} -{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file From 0c0d561bc47c588d0049fcab1dc8e400efe1a7b7 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 09:54:22 +0100 Subject: [PATCH 030/176] added public class into tests to create correct javadoc --- .../eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java | 2 +- .../oa/matchers/simple/EnrichMissingPublicationDateTest.java | 2 +- .../eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java | 2 +- .../test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 45bfc785f..52e9917bb 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; @ExtendWith(MockitoExtension.class) -class UpdateMatcherTest { +public class UpdateMatcherTest { UpdateMatcher matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 550ded9f4..5af81a31a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.broker.objects.OaBrokerMainEntity; -class EnrichMissingPublicationDateTest { +public class EnrichMissingPublicationDateTest { final EnrichMissingPublicationDate matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index b532aa9f7..d93390e4a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -8,7 +8,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; -class SubscriptionUtilsTest { +public class SubscriptionUtilsTest { @Test void testVerifyListSimilar() { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index a8bc03e31..117bdeef4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,7 +9,7 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -class TrustUtilsTest { +public class TrustUtilsTest { private static final double THRESHOLD = 0.95; From fc03c99805c540d03b82ea269d4ff6b4f3eec8cd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 10:46:33 +0100 Subject: [PATCH 031/176] fixed javadocs url after deploying site --- dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md index 00e3ed877..c1813394b 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md @@ -9,8 +9,8 @@ the following [schema](https://www.nlm.nih.gov/bsd/licensee/elements_description Parsing ------- -The resposible class of parsing is [PMParser](./scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates -an intermediate mapping of PubMed Article defined [here](/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html) +The resposible class of parsing is [PMParser](/dnet-hadoop/scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates +an intermediate mapping of PubMed Article defined [here](/dnet-hadoop/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html) Mapping From 9fae8721812010b790c4989cb7e38df8a0ec176b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 19 Nov 2021 11:25:50 +0100 Subject: [PATCH 032/176] [Graph Dump] changed to mirror the changes in the model --- .../dnetlib/dhp/common/GraphResultMapper.java | 413 ------------------ .../dhp/oa/graph/dump/ResultMapper.java | 45 +- .../dump/complete/DumpGraphEntities.java | 6 +- pom.xml | 2 +- 4 files changed, 25 insertions(+), 441 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java deleted file mode 100644 index 8ceee5c8a..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ /dev/null @@ -1,413 +0,0 @@ - -package eu.dnetlib.dhp.common; - -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.*; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class GraphResultMapper implements Serializable { - - public static Result map( - E in) { - - CommunityResult out = new CommunityResult(); - - eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; - Optional ort = Optional.ofNullable(input.getResulttype()); - if (ort.isPresent()) { - switch (ort.get().getClassid()) { - case "publication": - Optional journal = Optional - .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal()); - if (journal.isPresent()) { - Journal j = journal.get(); - Container c = new Container(); - c.setConferencedate(j.getConferencedate()); - c.setConferenceplace(j.getConferenceplace()); - c.setEdition(j.getEdition()); - c.setEp(j.getEp()); - c.setIss(j.getIss()); - c.setIssnLinking(j.getIssnLinking()); - c.setIssnOnline(j.getIssnOnline()); - c.setIssnPrinted(j.getIssnPrinted()); - c.setName(j.getName()); - c.setSp(j.getSp()); - c.setVol(j.getVol()); - out.setContainer(c); - out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); - } - break; - case "dataset": - eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; - Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); - Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); - - out - .setGeolocation( - Optional - .ofNullable(id.getGeolocation()) - .map( - igl -> igl - .stream() - .filter(Objects::nonNull) - .map(gli -> { - GeoLocation gl = new GeoLocation(); - gl.setBox(gli.getBox()); - gl.setPlace(gli.getPlace()); - gl.setPoint(gli.getPoint()); - return gl; - }) - .collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); - break; - case "software": - - eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input; - Optional - .ofNullable(is.getCodeRepositoryUrl()) - .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); - Optional - .ofNullable(is.getDocumentationUrl()) - .ifPresent( - value -> out - .setDocumentationUrl( - value - .stream() - .map(Field::getValue) - .collect(Collectors.toList()))); - - Optional - .ofNullable(is.getProgrammingLanguage()) - .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); - - out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); - break; - case "other": - - eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input; - out - .setContactgroup( - Optional - .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out - .setContactperson( - Optional - .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - out - .setTool( - Optional - .ofNullable(ir.getTool()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); - - break; - } - - Optional - .ofNullable(input.getAuthor()) - .ifPresent( - ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); - - // I do not map Access Right UNKNOWN or OTHER - - Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); - out - .setBestaccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - final List contributorList = new ArrayList<>(); - Optional - .ofNullable(input.getContributor()) - .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); - out.setContributor(contributorList); - - Optional - .ofNullable(input.getCountry()) - .ifPresent( - value -> out - .setCountry( - value - .stream() - .map( - c -> { - if (c.getClassid().equals((ModelConstants.UNKNOWN))) { - return null; - } - Country country = new Country(); - country.setCode(c.getClassid()); - country.setLabel(c.getClassname()); - Optional - .ofNullable(c.getDataInfo()) - .ifPresent( - provenance -> country - .setProvenance( - Provenance - .newInstance( - provenance - .getProvenanceaction() - .getClassname(), - c.getDataInfo().getTrust()))); - return country; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()))); - - final List coverageList = new ArrayList<>(); - Optional - .ofNullable(input.getCoverage()) - .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); - out.setCoverage(coverageList); - - out.setDateofcollection(input.getDateofcollection()); - - final List descriptionList = new ArrayList<>(); - Optional - .ofNullable(input.getDescription()) - .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); - out.setDescription(descriptionList); - Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); - if (oStr.isPresent()) { - out.setEmbargoenddate(oStr.get().getValue()); - } - - final List formatList = new ArrayList<>(); - Optional - .ofNullable(input.getFormat()) - .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); - out.setFormat(formatList); - out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); - - Optional> oInst = Optional - .ofNullable(input.getInstance()); - - if (oInst.isPresent()) { - out - .setInstance( - oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); - - } - - Optional oL = Optional.ofNullable(input.getLanguage()); - if (oL.isPresent()) { - eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); - } - Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); - if (oLong.isPresent()) { - out.setLastupdatetimestamp(oLong.get()); - } - Optional> otitle = Optional.ofNullable(input.getTitle()); - if (otitle.isPresent()) { - List iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setMaintitle(iTitle.get(0).getValue()); - } - - iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setSubtitle(iTitle.get(0).getValue()); - } - - } - - List pids = new ArrayList<>(); - Optional - .ofNullable(input.getPid()) - .ifPresent( - value -> value - .stream() - .forEach( - p -> pids - .add( - ControlledField - .newInstance(p.getQualifier().getClassid(), p.getValue())))); - out.setPid(pids); - oStr = Optional.ofNullable(input.getDateofacceptance()); - if (oStr.isPresent()) { - out.setPublicationdate(oStr.get().getValue()); - } - oStr = Optional.ofNullable(input.getPublisher()); - if (oStr.isPresent()) { - out.setPublisher(oStr.get().getValue()); - } - - List sourceList = new ArrayList<>(); - Optional - .ofNullable(input.getSource()) - .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue()))); - // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); - List subjectList = new ArrayList<>(); - Optional - .ofNullable(input.getSubject()) - .ifPresent( - value -> value - .forEach(s -> subjectList.add(getSubject(s)))); - - out.setSubjects(subjectList); - - out.setType(input.getResulttype().getClassid()); - } - - out - .setCollectedfrom( - input - .getCollectedfrom() - .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) - .collect(Collectors.toList())); - - return out; - - } - - private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { - CommunityInstance instance = new CommunityInstance(); - - setCommonValue(i, instance); - - instance - .setCollectedfrom( - KeyValue - .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); - - instance - .setHostedby( - KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); - - return instance; - - } - - private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { - Optional opAr = Optional - .ofNullable(i.getAccessright()); - if (opAr.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); - instance - .setAccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - Optional - .ofNullable(i.getLicense()) - .ifPresent(value -> instance.setLicense(value.getValue())); - Optional - .ofNullable(i.getDateofacceptance()) - .ifPresent(value -> instance.setPublicationdate(value.getValue())); - Optional - .ofNullable(i.getRefereed()) - .ifPresent(value -> instance.setRefereed(value.getClassname())); - Optional - .ofNullable(i.getInstancetype()) - .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); - - } - - private static Subject getSubject(StructuredProperty s) { - Subject subject = new Subject(); - subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); - Optional di = Optional.ofNullable(s.getDataInfo()); - if (di.isPresent()) { - Provenance p = new Provenance(); - p.setProvenance(di.get().getProvenanceaction().getClassname()); - p.setTrust(di.get().getTrust()); - subject.setProvenance(p); - } - - return subject; - } - - private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { - Author a = new Author(); - a.setFullname(oa.getFullname()); - a.setName(oa.getName()); - a.setSurname(oa.getSurname()); - a.setRank(oa.getRank()); - - Optional> oPids = Optional - .ofNullable(oa.getPid()); - if (oPids.isPresent()) { - Pid pid = getOrcid(oPids.get()); - if (pid != null) { - a.setPid(pid); - } - } - - return a; - } - - private static Pid getOrcid(List p) { - for (StructuredProperty pid : p) { - if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) { - Optional di = Optional.ofNullable(pid.getDataInfo()); - if (di.isPresent()) { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()), - Provenance - .newInstance( - di.get().getProvenanceaction().getClassname(), - di.get().getTrust())); - } else { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()) - - ); - } - - } - } - return null; - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 3a29a2bbf..7dc63a019 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -16,6 +16,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Country; import eu.dnetlib.dhp.schema.dump.oaf.GeoLocation; import eu.dnetlib.dhp.schema.dump.oaf.Instance; import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; +import eu.dnetlib.dhp.schema.dump.oaf.Measure; import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; import eu.dnetlib.dhp.schema.dump.oaf.Result; @@ -82,7 +83,7 @@ public class ResultMapper implements Serializable { if (c.getClassid().equals((ModelConstants.UNKNOWN))) { return null; } - Country country = new Country(); + ResultCountry country = new ResultCountry(); country.setCode(c.getClassid()); country.setLabel(c.getClassname()); Optional @@ -136,10 +137,6 @@ public class ResultMapper implements Serializable { .stream() .filter(s -> !s.startsWith("50|")) .collect(Collectors.toList()))); -// Optional -// .ofNullable( -// input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())) -// .ifPresent(v -> out.setOriginalId(v)); Optional> oInst = Optional .ofNullable(input.getInstance()); @@ -163,7 +160,7 @@ public class ResultMapper implements Serializable { Optional oL = Optional.ofNullable(input.getLanguage()); if (oL.isPresent()) { eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); + out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname())); } Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); if (oLong.isPresent()) { @@ -199,7 +196,7 @@ public class ResultMapper implements Serializable { value .stream() .map( - p -> ControlledField + p -> ResultPid .newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); @@ -452,14 +449,14 @@ public class ResultMapper implements Serializable { Constants.coarCodeLabelMap.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); - Optional> mes = Optional.ofNullable(i.getMeasures()); - if (mes.isPresent()){ - List measure = new ArrayList<>(); + Optional> mes = Optional.ofNullable(i.getMeasures()); + if (mes.isPresent()) { + List measure = new ArrayList<>(); mes - .get() - .forEach( - m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); - instance.setMeasures(measure ); + .get() + .forEach( + m -> m.getUnit().forEach(u -> measure.add(Measure.newInstance(m.getId(), u.getValue())))); + instance.setMeasures(measure); } if (opAr.get().getOpenAccessRoute() != null) { @@ -489,7 +486,7 @@ public class ResultMapper implements Serializable { .setPid( pid .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> ResultPid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); Optional @@ -499,7 +496,7 @@ public class ResultMapper implements Serializable { .setAlternateIdentifier( ai .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> AlternateIdentifier.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); Optional @@ -579,7 +576,7 @@ public class ResultMapper implements Serializable { private static Subject getSubject(StructuredProperty s) { Subject subject = new Subject(); - subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); + subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue())); Optional di = Optional.ofNullable(s.getDataInfo()); if (di.isPresent()) { Provenance p = new Provenance(); @@ -601,7 +598,7 @@ public class ResultMapper implements Serializable { Optional> oPids = Optional .ofNullable(oa.getPid()); if (oPids.isPresent()) { - Pid pid = getOrcid(oPids.get()); + AuthorPid pid = getOrcid(oPids.get()); if (pid != null) { a.setPid(pid); } @@ -610,12 +607,12 @@ public class ResultMapper implements Serializable { return a; } - private static Pid getAuthorPid(StructuredProperty pid) { + private static AuthorPid getAuthorPid(StructuredProperty pid) { Optional di = Optional.ofNullable(pid.getDataInfo()); if (di.isPresent()) { - return Pid + return AuthorPid .newInstance( - ControlledField + AuthorPidSchemeValue .newInstance( pid.getQualifier().getClassid(), pid.getValue()), @@ -624,9 +621,9 @@ public class ResultMapper implements Serializable { di.get().getProvenanceaction().getClassname(), di.get().getTrust())); } else { - return Pid + return AuthorPid .newInstance( - ControlledField + AuthorPidSchemeValue .newInstance( pid.getQualifier().getClassid(), pid.getValue()) @@ -635,7 +632,7 @@ public class ResultMapper implements Serializable { } } - private static Pid getOrcid(List p) { + private static AuthorPid getOrcid(List p) { List pidList = p.stream().map(pid -> { if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) || (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 7f64db41c..e6b4a9b70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -140,7 +140,7 @@ public class DumpGraphEntities implements Serializable { .ofNullable(d.getDatasourcetype()) .ifPresent( dsType -> datasource - .setDatasourcetype(ControlledField.newInstance(dsType.getClassid(), dsType.getClassname()))); + .setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname()))); Optional .ofNullable(d.getOpenairecompatibility()) @@ -499,7 +499,7 @@ public class DumpGraphEntities implements Serializable { .ifPresent( value -> { if (!value.getClassid().equals(Constants.UNKNOWN)) { - organization.setCountry(Qualifier.newInstance(value.getClassid(), value.getClassname())); + organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname())); } }); @@ -515,7 +515,7 @@ public class DumpGraphEntities implements Serializable { .setPid( value .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); return organization; diff --git a/pom.xml b/pom.xml index 3acbdee9e..a1e9ccc6a 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.23] + [2.9.24-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 2b46b87f56a209c5ca91803920121b96a5535306 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Nov 2021 11:30:29 +0100 Subject: [PATCH 033/176] fixed filtering criteria applied in SparkCopyRelationsNoOpenorgs to keep the parent/child relations from OpenOrgs --- .../dhp/oa/dedup/AbstractSparkAction.java | 24 +++++++++++++++---- .../dedup/SparkCopyRelationsNoOpenorgs.java | 2 +- .../oa/dedup/SparkOpenorgsProvisionTest.java | 24 ++++++++++++++++--- .../openorgs/provision/relation/part-00000 | 2 ++ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 6a9b21b00..136413376 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -139,14 +139,28 @@ abstract class AbstractSparkAction implements Serializable { protected boolean isOpenorgs(Relation rel) { return Optional .ofNullable(rel.getCollectedfrom()) - .map( - c -> c - .stream() - .filter(Objects::nonNull) - .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) + .map(c -> isCollectedFromOpenOrgs(c)) .orElse(false); } + protected boolean isOpenorgsDedupRel(Relation rel) { + return isOpenorgs(rel) && isOpenOrgsDedupMergeRelation(rel); + } + + private boolean isCollectedFromOpenOrgs(List c) { + return c + .stream() + .filter(Objects::nonNull) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())); + } + + private boolean isOpenOrgsDedupMergeRelation(Relation rel) { + return ModelConstants.ORG_ORG_RELTYPE.equals(rel.getRelType()) && + ModelConstants.DEDUP.equals(rel.getSubRelType()) + && (ModelConstants.IS_MERGED_IN.equals(rel.getRelClass()) || + ModelConstants.MERGES.equals(rel.getRelClass())); + } + protected static Boolean parseECField(Field field) { if (field == null) return null; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9cc003bf6..62cbb5bff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -61,7 +61,7 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(x -> !isOpenorgs(x)); + .filter(x -> !isOpenorgsDedupRel(x)); if (log.isDebugEnabled()) { log.debug("Number of non-Openorgs relations collected: {}", simRels.count()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 2349ffebe..3cd695524 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -11,6 +11,8 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; +import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -29,6 +31,8 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -226,9 +230,23 @@ public class SparkOpenorgsProvisionTest implements Serializable { new SparkCopyRelationsNoOpenorgs(parser, spark).run(isLookUpService); - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + final JavaRDD rels = jsc.textFile(testDedupGraphBasePath + "/relation"); + + long relations = rels.count(); + + final ObjectMapper mapper = new ObjectMapper(); + List relTypes = rels + .map(r -> mapper.readValue(r, Relation.class)) + .map( + r -> r.getRelType() + "_" + r.getSubRelType() + "_" + r.getRelClass() + "|" + + r.getCollectedfrom().stream().map(cf -> cf.getValue()).collect(Collectors.joining(","))) + .distinct() + .collect(); + + relTypes.forEach(r -> System.out.println("relType: " + r)); + + assertEquals(2382, relations); - assertEquals(2380, relations); } @Test @@ -250,7 +268,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4894, relations); + assertEquals(4896, relations); // check deletedbyinference final Dataset mergeRels = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 index 67d491ca2..35d92089d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 @@ -2518,3 +2518,5 @@ {"subRelType": "dedup", "relClass": "isMergedIn", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::5c351d85f02db01ca291acd119f0bd78", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|opendoar____::37248e2f6987b18670dd2b8a51d6ef55", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} {"subRelType": "dedup", "relClass": "merges", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} {"subRelType": "dedup", "relClass": "isMergedIn", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} +{"subRelType": "relationship", "relClass": "IsParentOf", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} +{"subRelType": "relationship", "relClass": "IsChildOf", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} From f4538f3c4c29df211b9c8eb5f2560620677ff642 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Nov 2021 11:33:10 +0100 Subject: [PATCH 034/176] cleanup --- .../dhp/oa/dedup/SparkOpenorgsProvisionTest.java | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 3cd695524..2a9f34dee 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -232,20 +232,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { final JavaRDD rels = jsc.textFile(testDedupGraphBasePath + "/relation"); - long relations = rels.count(); - - final ObjectMapper mapper = new ObjectMapper(); - List relTypes = rels - .map(r -> mapper.readValue(r, Relation.class)) - .map( - r -> r.getRelType() + "_" + r.getSubRelType() + "_" + r.getRelClass() + "|" + - r.getCollectedfrom().stream().map(cf -> cf.getValue()).collect(Collectors.joining(","))) - .distinct() - .collect(); - - relTypes.forEach(r -> System.out.println("relType: " + r)); - - assertEquals(2382, relations); + assertEquals(2382, rels.count()); } From 4542a2338ba6dabdbc5f06e26e992ed8b7c26f6b Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 13:44:08 +0100 Subject: [PATCH 035/176] updated site configuration to deploy on website --- dhp-build/dhp-code-style/pom.xml | 11 +++++++++-- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 7a6a32e0e..25151fb87 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -24,11 +24,18 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-build/dhp-code-style + ${dhp.site.stage.path}/dhp-build/dhp-code-style + + + org.apache.maven.wagon + wagon-ssh + 2.10 + + @@ -47,7 +54,7 @@ UTF-8 - /tmp/dhp-site + sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop \ No newline at end of file diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index fed689a06..1b39f9f0a 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -24,7 +24,7 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-build + ${dhp.site.stage.path}/dhp-build diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 686b89f6b..4a13b3459 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -16,7 +16,7 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-common + ${dhp.site.stage.path}/dhp-common diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 89ba2bf70..53d029467 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -18,7 +18,7 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-workflows + ${dhp.site.stage.path}/dhp-workflows diff --git a/pom.xml b/pom.xml index f7e3c6226..9e9bbaa16 100644 --- a/pom.xml +++ b/pom.xml @@ -721,7 +721,7 @@ DHPSite - file://${dhp.site.stage.path}/site/ + ${dhp.site.stage.path}/ @@ -738,7 +738,7 @@ - /tmp/dhp-site + sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop UTF-8 UTF-8 3.6.0 From 75298ec44272b2589821478d1ba0be85200731b3 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 14:48:44 +0100 Subject: [PATCH 036/176] added site.xml to code style --- dhp-build/dhp-code-style/src/site/site.xml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 dhp-build/dhp-code-style/src/site/site.xml diff --git a/dhp-build/dhp-code-style/src/site/site.xml b/dhp-build/dhp-code-style/src/site/site.xml new file mode 100644 index 000000000..634a2c154 --- /dev/null +++ b/dhp-build/dhp-code-style/src/site/site.xml @@ -0,0 +1,21 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + \ No newline at end of file From 155d8bf83fdfb6fe069aa062076b95054272129a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 14:51:08 +0100 Subject: [PATCH 037/176] updated maven site plugin on dhp-code-style --- dhp-build/dhp-code-style/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 25151fb87..db0097d64 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -46,7 +46,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.9.1 From 65ebe1019b859b93a13d07f6029d19057ac22782 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 14:59:04 +0100 Subject: [PATCH 038/176] updated wagon-ssh version --- dhp-build/dhp-code-style/pom.xml | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index db0097d64..707033fd9 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -33,7 +33,7 @@ org.apache.maven.wagon wagon-ssh - 2.10 + 2.12 diff --git a/pom.xml b/pom.xml index 9e9bbaa16..12c0c9c1b 100644 --- a/pom.xml +++ b/pom.xml @@ -704,7 +704,7 @@ org.apache.maven.wagon wagon-ssh - 2.10 + 2.12 From 6110a2b984251c2a12fcb0c9e484900584f057cb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 15:31:45 +0100 Subject: [PATCH 039/176] reverted version --- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 707033fd9..db0097d64 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -33,7 +33,7 @@ org.apache.maven.wagon wagon-ssh - 2.12 + 2.10 diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 1b39f9f0a..97fbdf45b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -24,7 +24,7 @@ DHPSite - ${dhp.site.stage.path}/dhp-build + ${dhp.site.stage.path}/dhp-build/ diff --git a/pom.xml b/pom.xml index 12c0c9c1b..9e9bbaa16 100644 --- a/pom.xml +++ b/pom.xml @@ -704,7 +704,7 @@ org.apache.maven.wagon wagon-ssh - 2.12 + 2.10 From 0506fa2654e422ee12d84aca7393085edd932087 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 19 Nov 2021 15:56:25 +0100 Subject: [PATCH 040/176] [Graph Dump] changed to mirror the changes in the model --- .../eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 10 ++++------ .../dhp/oa/graph/dump/complete/DumpGraphEntities.java | 7 +------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 7dc63a019..aad3a8706 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -12,14 +12,12 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.*; import eu.dnetlib.dhp.schema.dump.oaf.AccessRight; import eu.dnetlib.dhp.schema.dump.oaf.Author; -import eu.dnetlib.dhp.schema.dump.oaf.Country; import eu.dnetlib.dhp.schema.dump.oaf.GeoLocation; import eu.dnetlib.dhp.schema.dump.oaf.Instance; -import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; import eu.dnetlib.dhp.schema.dump.oaf.Measure; import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; -import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.dump.oaf.community.CfHbKeyValue; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Context; @@ -231,7 +229,7 @@ public class ResultMapper implements Serializable { input .getCollectedfrom() .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) + .map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue())) .collect(Collectors.toList())); Set communities = communityMap.keySet(); @@ -424,12 +422,12 @@ public class ResultMapper implements Serializable { instance .setCollectedfrom( - KeyValue + CfHbKeyValue .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); instance .setHostedby( - KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); + CfHbKeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); return instance; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index e6b4a9b70..c9de86613 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import static com.jayway.jsonpath.Filter.filter; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -11,9 +10,7 @@ import java.util.stream.Collectors; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; @@ -22,8 +19,6 @@ import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -133,7 +128,7 @@ public class DumpGraphEntities implements Serializable { .ifPresent( pids -> pids .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList())); Optional From fdb75b180e8fdb297bbbc1e4fc915133a01b1a95 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sun, 21 Nov 2021 16:35:22 +0100 Subject: [PATCH 041/176] [Cleaning] added couple of tests for DOIBOOST publications --- .../clean/GraphCleaningFunctionsTest.java | 27 +++++++++++++++++++ .../dhp/oa/graph/clean/doiboostpub.json | 1 + .../dhp/oa/graph/clean/doiboostpub2.json | 1 + 3 files changed, 29 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index aa9535ef7..64fe7749b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -12,6 +12,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlock; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -222,4 +224,29 @@ public class GraphCleaningFunctionsTest { .readLines( GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); } + + + + @Test + public void testCleanDoiBoost() throws IOException { + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + + + Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + } + + @Test + public void testCleanDoiBoost2() throws IOException { + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + + + Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json new file mode 100644 index 000000000..061145d0f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json @@ -0,0 +1 @@ +{"context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-11-06T11:36:37Z"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "id": "50|doi_________::b0baa0eb88a5788f0b8815560d2a32f2", "subject": [], "lastupdatetimestamp": 1620353302565, "author": [{"fullname": "N. S. AGRUSS", "surname": "AGRUSS", "name": "N. S.", "rank": 1}, {"fullname": "E. Y. ROSIN", "surname": "ROSIN", "name": "E. Y.", "rank": 2}, {"fullname": "R. J. ADOLPH", "surname": "ADOLPH", "name": "R. J.", "rank": 3}, {"fullname": "N. O. FOWLER", "surname": "FOWLER", "name": "N. O.", "rank": 4}], "instance": [{"hostedby": {"key": "10|issn___print::b8cee613d4f898f8c03956d57ea69be2", "value": "Survey of Anesthesiology"}, "url": ["https://doi.org/10.1097/00132586-197308000-00003"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T02:08:22Z", "fulltext": [], "description": [], "format": [], "journal": {"issnPrinted": "0039-6206", "vol": "17", "sp": "304", "name": "Survey of Anesthesiology"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Ovid Technologies (Wolters Kluwer Health)"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.1097/00132586-197308000-00003", "50|doiboost____::b0baa0eb88a5788f0b8815560d2a32f2"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "SIGNIFICANCE OF CHRONIC SINUS BRADYCARDIA IN ELDERLY PEOPLE"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json new file mode 100644 index 000000000..cf81b0286 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json @@ -0,0 +1 @@ +{"context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2007-08-20T08:35:04Z"}, {"qualifier": {"classid": "published-online", "classname": "published-online", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-01-01"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "id": "50|doi_________::4972b0ca81b96b225aed8038bb965656", "subject": [{"qualifier": {"classid": "keywords", "classname": "keywords", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "General Medicine"}], "lastupdatetimestamp": 1620381522840, "author": [{"fullname": "null VERHAMME P", "surname": "VERHAMME P", "rank": 1}], "instance": [{"hostedby": {"key": "10|issn__online::7ec728ad1ac65c60cd563a5137111125", "value": "Tijdschrift voor Geneeskunde"}, "url": ["https://doi.org/10.2143/tvg.62.1.5002364"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "dateofacceptance": {"value": "2006-01-01"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T09:58:42Z", "fulltext": [], "description": [], "format": [], "journal": {"vol": "62", "sp": "55", "issnOnline": "0371-683X", "ep": "61", "name": "Tijdschrift voor Geneeskunde"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Peeters Publishers"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.2143/tvg.62.1.5002364", "50|doiboost____::4972b0ca81b96b225aed8038bb965656"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-01-01"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Antitrombotica: nieuwe moleculen"}]} \ No newline at end of file From 35e20b0647ac97ffefddf0ca5cdc1e15488cf6d1 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 11:48:55 +0100 Subject: [PATCH 042/176] updated resolution wf: - generate a new version of the graph - changed merge from union to join --- .../resolution/SparkResolveEntities.scala | 72 +++++++++---------- .../resolution/SparkResolveRelation.scala | 12 ++-- .../graph/resolution/oozie_app/workflow.xml | 6 ++ .../resolution/resolve_entities_params.json | 3 +- .../resolution/resolve_relations_params.json | 3 +- .../resolution/ResolveEntitiesTest.scala | 48 ++++++++++++- 6 files changed, 97 insertions(+), 47 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 316b8afed..f8ebb6800 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -14,7 +14,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkResolveEntities { val mapper = new ObjectMapper() - val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + val entities = List(EntityType.dataset, EntityType.publication, EntityType.software, EntityType.otherresearchproduct) def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) @@ -36,25 +36,19 @@ object SparkResolveEntities { val unresolvedPath = parser.get("unresolvedPath") log.info(s"unresolvedPath -> $unresolvedPath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) fs.mkdirs(new Path(workingPath)) resolveEntities(spark, workingPath, unresolvedPath) - generateResolvedEntities(spark, workingPath, graphBasePath) - - // TO BE conservative we keep the original entities in the working dir - // and save the resolved entities on the graphBasePath - //In future these lines of code should be removed - entities.foreach { - e => - fs.rename(new Path(s"$graphBasePath/$e"), new Path(s"$workingPath/${e}_old")) - fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) - } - -} + generateResolvedEntities(spark, workingPath, graphBasePath, targetPath) + } -def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { + def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ @@ -71,37 +65,43 @@ def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: St } - def deserializeObject(input:String, entity:EntityType ) :Result = { + def deserializeObject(input: String, entity: EntityType): Result = { - entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software=> mapper.readValue(input, classOf[Software]) - case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) - } + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software => mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) + } } - def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { + def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String, targetPath:String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + val re: Dataset[(String, Result)] = spark.read.load(s"$workingPath/resolvedEntities").as[Result].map(r => (r.getId, r)) entities.foreach { - e => + e => { + + val currentEntityDataset: Dataset[(String, Result)] = spark.read.text(s"$graphBasePath/$e").as[String].map(s => deserializeObject(s, e)).map(r => (r.getId, r)) + + + currentEntityDataset.joinWith(re, currentEntityDataset("_1").equalTo(re("_1")), "left").map(k => { + + val a = k._1 + val b = k._2 + if (b == null) + a._2 + else { + a._2.mergeFrom(b._2) + a._2 + } + }).map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$targetPath/$e") + } + - spark.read.text(s"$graphBasePath/$e").as[String] - .map(s => deserializeObject(s, e)) - .union(re) - .groupByKey(_.getId) - .reduceGroups { - (x, y) => - x.mergeFrom(y) - x - }.map(_._2) - .filter(r => r.getClass.getSimpleName.toLowerCase != "result") - .map(r => mapper.writeValueAsString(r))(Encoders.STRING) - .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingPath/resolvedGraph/$e") } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index cd517dd5e..a194f2694 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -35,6 +35,9 @@ object SparkResolveRelation { val workingPath = parser.get("workingPath") log.info(s"workingPath -> $workingPath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) import spark.implicits._ @@ -80,20 +83,13 @@ object SparkResolveRelation { .mode(SaveMode.Overwrite) .save(s"$workingPath/relation_resolved") - - // TO BE conservative we keep the original relation in the working dir - // and save the relation resolved on the graphBasePath - //In future this two line of code should be removed - - fs.rename(new Path(s"$graphBasePath/relation"), new Path(s"$workingPath/relation")) - spark.read.load(s"$workingPath/relation_resolved").as[Relation] .filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) .map(r => mapper.writeValueAsString(r)) .write .option("compression", "gzip") .mode(SaveMode.Overwrite) - .text(s"$graphBasePath/relation") + .text(s"$targetPath/relation") } def extractInstanceCF(input: String): List[(String, String)] = { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index ceb13c5e8..8859295bf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -8,6 +8,10 @@ unresolvedPath the path of the unresolved Entities + + targetPath + the target path after resolution + @@ -36,6 +40,7 @@ --masteryarn --graphBasePath${graphBasePath} --workingPath${workingDir} + --targetPath${targetPath} @@ -62,6 +67,7 @@ --graphBasePath${graphBasePath} --unresolvedPath${unresolvedPath} --workingPath${workingDir} + --targetPath${targetPath} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json index f38cc1291..67e315664 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json @@ -2,5 +2,6 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, {"paramName":"u", "paramLongName":"unresolvedPath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target path", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json index 1fbe20648..66a035da5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json @@ -1,5 +1,6 @@ [ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target path", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 46bf48974..0d7350fdd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -4,7 +4,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Result, StructuredProperty} +import eu.dnetlib.dhp.schema.oaf.{Publication, Result, StructuredProperty} import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -154,19 +154,39 @@ class ResolveEntitiesTest extends Serializable { val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + var ct = pubDS.count() + var et = pubDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + + assertEquals(ct, et) + + val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + ct = datDS.count() + et = datDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + assertEquals(ct, et) val softDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + ct = softDS.count() + et = softDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + assertEquals(ct, et) val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + ct = orpDS.count() + et = orpDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + assertEquals(ct, et) + + + + + assertEquals(0, t) assertEquals(2, td) assertEquals(1, ts) @@ -178,6 +198,32 @@ class ResolveEntitiesTest extends Serializable { + @Test + def testMerge():Unit = { + + val r = new Result + r.setSubject(List(OafMapperUtils.structuredProperty(FAKE_SUBJECT, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) + + val mapper = new ObjectMapper() + + val p = mapper.readValue(Source.fromInputStream(this.getClass.getResourceAsStream(s"publication")).mkString.lines.next(), classOf[Publication]) + + + r.mergeFrom(p) + + + println(mapper.writeValueAsString(r)) + + + + + + + + + } + + From 93fe8ce8b2011aa6108793ba7389eef278aa62ac Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 15:50:43 +0100 Subject: [PATCH 043/176] entity resolution: fix test --- .../dhp/oa/graph/resolution/SparkResolveEntities.scala | 5 ++--- .../dhp/oa/graph/resolution/ResolveEntitiesTest.scala | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index f8ebb6800..be217c5c3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -80,12 +80,11 @@ object SparkResolveEntities { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re: Dataset[(String, Result)] = spark.read.load(s"$workingPath/resolvedEntities").as[Result].map(r => (r.getId, r)) + val re: Dataset[(String, Result)] = spark.read.load(s"$workingPath/resolvedEntities").as[Result].map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) entities.foreach { e => { - val currentEntityDataset: Dataset[(String, Result)] = spark.read.text(s"$graphBasePath/$e").as[String].map(s => deserializeObject(s, e)).map(r => (r.getId, r)) - + val currentEntityDataset: Dataset[(String, Result)] = spark.read.text(s"$graphBasePath/$e").as[String].map(s => deserializeObject(s, e)).map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) currentEntityDataset.joinWith(re, currentEntityDataset("_1").equalTo(re("_1")), "left").map(k => { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 0d7350fdd..c22243f94 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -146,11 +146,11 @@ class ResolveEntitiesTest extends Serializable { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) val m = new ObjectMapper() SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) - SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph" ) + SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph", s"$workingDir/target" ) - val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) + val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/target/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() @@ -161,21 +161,21 @@ class ResolveEntitiesTest extends Serializable { - val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val datDS:Dataset[Result] = spark.read.text(s"$workingDir/target/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() ct = datDS.count() et = datDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() assertEquals(ct, et) - val softDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) + val softDS:Dataset[Result] = spark.read.text(s"$workingDir/target/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() ct = softDS.count() et = softDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() assertEquals(ct, et) - val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) + val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/target/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() From 483d3039d1a4e42656fff8a205f3b75d7271aed7 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 15:55:24 +0100 Subject: [PATCH 044/176] entity resolution: added distcpt of missing entities in graph materialization --- .../graph/resolution/oozie_app/workflow.xml | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 8859295bf..031b5a36f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -69,10 +69,44 @@ --workingPath${workingDir} --targetPath${targetPath} - + - + + + + + + + + ${nameNode}/${graphBasePath}/organization + ${nameNode}/${targetPath}/organization + + + + + + + + ${nameNode}/${graphBasePath}/project + ${nameNode}/${targetPath}/project + + + + + + + + ${nameNode}/${graphBasePath}/datasource + ${nameNode}/${targetPath}/datasource + + + + + + + + \ No newline at end of file From a7cf277d98aee3fc6bad29661aca9b96beccdc6d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 16:03:17 +0100 Subject: [PATCH 045/176] Datacite: Removed HostedBy Patch as described on ticket #7219, Now all the records will have hosted by Unknown Repository --- .../dhp/datacite/DataciteToOAFTransformation.scala | 14 ++++---------- .../dnetlib/dhp/datacite/DataciteToOAFTest.scala | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 6ce4920ed..1af72e8d3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -41,7 +41,7 @@ case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle case class DateType(date: Option[String], dateType: Option[String]) {} -case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {} +//case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {} object DataciteToOAFTransformation { @@ -90,17 +90,11 @@ object DataciteToOAFTransformation { } val mapper = new ObjectMapper() - val unknown_repository: HostedByMapType = HostedByMapType(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, ModelConstants.UNKNOWN_REPOSITORY.getValue, ModelConstants.UNKNOWN_REPOSITORY.getValue, Some(1.0F)) val dataInfo: DataInfo = generateDataInfo("0.9") val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite") - val hostedByMap: Map[String, HostedByMapType] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: org.json4s.JValue = parse(s) - json.extract[Map[String, HostedByMapType]] - } + val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) @@ -516,8 +510,8 @@ object DataciteToOAFTransformation { val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) if (client.isDefined) { - val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository) - instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name)) + + instance.setHostedby(OafMapperUtils.keyValue(generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID), ModelConstants.UNKNOWN_REPOSITORY.getValue)) instance.setCollectedfrom(DATACITE_COLLECTED_FROM) instance.setUrl(List(s"https://dx.doi.org/$doi").asJava) instance.setAccessright(access_rights_qualifier) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index f21e9eab1..c7c6c6a92 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -36,7 +36,7 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ @Test def testMapping() :Unit = { - val record =Source.fromInputStream(getClass.getResourceAsStream("record.json")).mkString + val record =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json")).mkString From 2164a2a8898da176799bab3815244b234746eb45 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 10:54:13 +0100 Subject: [PATCH 046/176] Datacite: Code Refactor generated a general SparkApplication Scala where all the spark scala have to inherit Commented a little the Datacite transformation code --- .../AbstractScalaApplication.scala | 37 +++++ .../application/SparkScalaApplication.scala | 35 +++++ .../dhp/datacite/DataciteModelConstants.scala | 134 ++++++++++++++++++ .../DataciteToOAFTransformation.scala | 127 +++-------------- .../GenerateDataciteDatasetSpark.scala | 86 +++++++---- .../src/main/resources/log4j.properties | 3 + .../dhp/datacite/DataciteToOAFTest.scala | 57 +++++++- 7 files changed, 337 insertions(+), 142 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala new file mode 100644 index 000000000..44dad93eb --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala @@ -0,0 +1,37 @@ +package eu.dnetlib.dhp.application + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.slf4j.Logger + +abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication { + + var parser: ArgumentApplicationParser = null + + var spark:SparkSession = null + + + def initialize():SparkScalaApplication = { + parser = parseArguments(args) + spark = createSparkSession() + this + } + + /** + * Utility for creating a spark session starting from parser + * + * @return a spark Session + */ + private def createSparkSession():SparkSession = { + require(parser!= null) + + val conf:SparkConf = new SparkConf() + val master = parser.get("master") + log.info(s"Creating Spark session: Master: $master") + SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + } + +} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala new file mode 100644 index 000000000..247bacac0 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.application + +import scala.io.Source + +/** + * This is the main Interface SparkApplication + * where all the Spark Scala class should inherit + * + */ +trait SparkScalaApplication { + /** + * This is the path in the classpath of the json + * describes all the argument needed to run + */ + val propertyPath: String + + /** + * Utility to parse the arguments using the + * property json in the classpath identified from + * the variable propertyPath + * + * @param args the list of arguments + */ + def parseArguments(args: Array[String]): ArgumentApplicationParser = { + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString) + parser.parseArgument(args) + parser + } + + /** + * Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + def run(): Unit +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala new file mode 100644 index 000000000..0685a04a3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala @@ -0,0 +1,134 @@ +package eu.dnetlib.dhp.datacite + +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue} +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils + +import java.io.InputStream +import java.time.format.DateTimeFormatter +import java.util.Locale +import java.util.regex.Pattern +import scala.io.Source + +/** + * This class represent the dataModel of the input Dataset of Datacite + * @param doi THE DOI + * @param timestamp timestamp of last update date + * @param isActive the record is active or deleted + * @param json the json native records + */ +case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {} + +/* + The following class are utility class used for the mapping from + json datacite to OAF Shema + */ +case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {} + +case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {} + +case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {} + +case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} + +case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {} + +case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} + +case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {} + +case class DateType(date: Option[String], dateType: Option[String]) {} + +case class OAFRelations(relation:String, inverse:String, relType:String) + + +class DataciteModelConstants extends Serializable { + +} + +object DataciteModelConstants { + + val REL_TYPE_VALUE:String = "resultResult" + val DATE_RELATION_KEY = "RelationDate" + val DATACITE_FILTER_PATH = "/eu/dnetlib/dhp/datacite/datacite_filter" + val DOI_CLASS = "doi" + val SUBJ_CLASS = "keywords" + val DATACITE_NAME = "Datacite" + val dataInfo: DataInfo = dataciteDataInfo("0.9") + val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) + + val subRelTypeMapping: Map[String,OAFRelations] = Map( + ModelConstants.REFERENCES -> OAFRelations(ModelConstants.REFERENCES, ModelConstants.IS_REFERENCED_BY, ModelConstants.RELATIONSHIP), + ModelConstants.IS_REFERENCED_BY -> OAFRelations(ModelConstants.IS_REFERENCED_BY,ModelConstants.REFERENCES, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.SUPPLEMENT), + ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations(ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.SUPPLEMENT), + + ModelConstants.HAS_PART -> OAFRelations(ModelConstants.HAS_PART,ModelConstants.IS_PART_OF, ModelConstants.PART), + ModelConstants.IS_PART_OF -> OAFRelations(ModelConstants.IS_PART_OF,ModelConstants.HAS_PART, ModelConstants.PART), + + ModelConstants.IS_VERSION_OF-> OAFRelations(ModelConstants.IS_VERSION_OF,ModelConstants.HAS_VERSION,ModelConstants.VERSION), + ModelConstants.HAS_VERSION-> OAFRelations(ModelConstants.HAS_VERSION,ModelConstants.IS_VERSION_OF,ModelConstants.VERSION), + + ModelConstants.IS_IDENTICAL_TO -> OAFRelations(ModelConstants.IS_IDENTICAL_TO,ModelConstants.IS_IDENTICAL_TO, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_CONTINUED_BY -> OAFRelations(ModelConstants.IS_CONTINUED_BY,ModelConstants.CONTINUES, ModelConstants.RELATIONSHIP), + ModelConstants.CONTINUES -> OAFRelations(ModelConstants.CONTINUES,ModelConstants.IS_CONTINUED_BY, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_NEW_VERSION_OF-> OAFRelations(ModelConstants.IS_NEW_VERSION_OF,ModelConstants.IS_PREVIOUS_VERSION_OF, ModelConstants.VERSION), + ModelConstants.IS_PREVIOUS_VERSION_OF ->OAFRelations(ModelConstants.IS_PREVIOUS_VERSION_OF,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION), + + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP), + ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_SOURCE_OF -> OAFRelations(ModelConstants.IS_SOURCE_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION), + ModelConstants.IS_DERIVED_FROM -> OAFRelations(ModelConstants.IS_DERIVED_FROM,ModelConstants.IS_SOURCE_OF, ModelConstants.VERSION), + + ModelConstants.CITES -> OAFRelations(ModelConstants.CITES,ModelConstants.IS_CITED_BY, ModelConstants.CITATION), + ModelConstants.IS_CITED_BY -> OAFRelations(ModelConstants.IS_CITED_BY,ModelConstants.CITES, ModelConstants.CITATION), + + ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(ModelConstants.IS_VARIANT_FORM_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION), + ModelConstants.IS_OBSOLETED_BY -> OAFRelations(ModelConstants.IS_OBSOLETED_BY,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION), + + ModelConstants.REVIEWS -> OAFRelations(ModelConstants.REVIEWS,ModelConstants.IS_REVIEWED_BY, ModelConstants.REVIEW), + ModelConstants.IS_REVIEWED_BY -> OAFRelations(ModelConstants.IS_REVIEWED_BY,ModelConstants.REVIEWS, ModelConstants.REVIEW), + + ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP), + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP), + + ModelConstants.COMPILES -> OAFRelations(ModelConstants.COMPILES,ModelConstants.IS_COMPILED_BY, ModelConstants.RELATIONSHIP), + ModelConstants.IS_COMPILED_BY -> OAFRelations(ModelConstants.IS_COMPILED_BY,ModelConstants.COMPILES, ModelConstants.RELATIONSHIP) + ) + + + val datacite_filter: List[String] = { + val stream: InputStream = getClass.getResourceAsStream(DATACITE_FILTER_PATH) + require(stream!= null) + Source.fromInputStream(stream).getLines().toList + } + + + def dataciteDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo(false,null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, trust) + + val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) + val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) + + val funder_regex: List[(Pattern, String)] = List( + (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"), + (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::") + + ) + + val Date_regex: List[Pattern] = List( + //Y-M-D + Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE), + //M-D-Y + Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE), + //D-M-Y + Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE), + //Y + Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) + ) + + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 1af72e8d3..6b4deef0a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.datacite.DataciteModelConstants._ import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} @@ -12,115 +13,30 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse -import java.nio.charset.CodingErrorAction import java.text.SimpleDateFormat import java.time.LocalDate import java.time.chrono.ThaiBuddhistDate import java.time.format.DateTimeFormatter -import java.util.regex.Pattern import java.util.{Date, Locale} import scala.collection.JavaConverters._ -import scala.io.{Codec, Source} -import scala.language.postfixOps -case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {} - -case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {} - -case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {} - -case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {} - -case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} - -case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {} - -case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} - -case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {} - -case class DateType(date: Option[String], dateType: Option[String]) {} - -//case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {} object DataciteToOAFTransformation { - val REL_TYPE_VALUE:String = "resultResult" - val DATE_RELATION_KEY = "RelationDate" - - val subRelTypeMapping: Map[String,(String,String)] = Map( - "References" ->("IsReferencedBy","relationship"), - "IsSupplementTo" ->("IsSupplementedBy","supplement"), - "IsPartOf" ->("HasPart","part"), - "HasPart" ->("IsPartOf","part"), - "IsVersionOf" ->("HasVersion","version"), - "HasVersion" ->("IsVersionOf","version"), - "IsIdenticalTo" ->("IsIdenticalTo","relationship"), - "IsPreviousVersionOf" ->("IsNewVersionOf","version"), - "IsContinuedBy" ->("Continues","relationship"), - "Continues" ->("IsContinuedBy","relationship"), - "IsNewVersionOf" ->("IsPreviousVersionOf","version"), - "IsSupplementedBy" ->("IsSupplementTo","supplement"), - "IsDocumentedBy" ->("Documents","relationship"), - "IsSourceOf" ->("IsDerivedFrom","relationship"), - "Cites" ->("IsCitedBy","citation"), - "IsCitedBy" ->("Cites","citation"), - "IsDerivedFrom" ->("IsSourceOf","relationship"), - "IsVariantFormOf" ->("IsDerivedFrom","version"), - "IsReferencedBy" ->("References","relationship"), - "IsObsoletedBy" ->("IsNewVersionOf","version"), - "Reviews" ->("IsReviewedBy","review"), - "Documents" ->("IsDocumentedBy","relationship"), - "IsCompiledBy" ->("Compiles","relationship"), - "Compiles" ->("IsCompiledBy","relationship"), - "IsReviewedBy" ->("Reviews","review") - ) - - implicit val codec: Codec = Codec("UTF-8") - codec.onMalformedInput(CodingErrorAction.REPLACE) - codec.onUnmappableCharacter(CodingErrorAction.REPLACE) - - val DOI_CLASS = "doi" - val SUBJ_CLASS = "keywords" - - - val j_filter: List[String] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString - s.lines.toList - } - val mapper = new ObjectMapper() - val dataInfo: DataInfo = generateDataInfo("0.9") - val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite") - - - val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) - val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) - - val funder_regex: List[(Pattern, String)] = List( - (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"), - (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::") - - ) - - val Date_regex: List[Pattern] = List( - //Y-M-D - Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE), - //M-D-Y - Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE), - //D-M-Y - Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE), - //Y - Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) - ) - - - def filter_json(json: String): Boolean = { - j_filter.exists(f => json.contains(f)) + /** + * This method should skip record if json contains invalid text + * defined in gile datacite_filter + * @param json + * @return True if the record should be skipped + */ + def skip_record(json: String): Boolean = { + datacite_filter.exists(f => json.contains(f)) } + @deprecated("this method will be removed", "dhp") def toActionSet(item: Oaf): (String, String) = { val mapper = new ObjectMapper() @@ -200,6 +116,8 @@ object DataciteToOAFTransformation { case _: Throwable => "" } } + + def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = { if (resourceType != null && resourceType.nonEmpty) { val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) @@ -318,11 +236,7 @@ object DataciteToOAFTransformation { val p = match_pattern.get._2 val grantId = m.matcher(awardUri).replaceAll("$2") val targetId = s"$p${DHPUtils.md5(grantId)}" - List( - generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo) -// REMOVED INVERSE RELATION since there is a specific method that should generate later -// generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo) - ) + List( generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo) ) } else List() @@ -331,7 +245,7 @@ object DataciteToOAFTransformation { def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup, exportLinks: Boolean): List[Oaf] = { - if (filter_json(input)) + if (skip_record(input)) return List() implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -565,7 +479,7 @@ object DataciteToOAFTransformation { rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setDataInfo(dataInfo) - val subRelType = subRelTypeMapping(r.relationType)._2 + val subRelType = subRelTypeMapping(r.relationType).relType rel.setRelType(REL_TYPE_VALUE) rel.setSubRelType(subRelType) rel.setRelClass(r.relationType) @@ -579,18 +493,9 @@ object DataciteToOAFTransformation { rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel - }).toList + }) } - def generateDataInfo(trust: String): DataInfo = { - val di = new DataInfo - di.setDeletedbyinference(false) - di.setInferred(false) - di.setInvisible(false) - di.setTrust(trust) - di.setProvenanceaction(ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER) - di - } def generateDSId(input: String): String = { val b = StringUtils.substringBefore(input, "::") diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index a63627d1c..e1607ee9c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -1,64 +1,94 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.application.AbstractScalaApplication import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations -import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH -import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile import eu.dnetlib.dhp.utils.ISLookupClientFactory -import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import scala.io.Source -object GenerateDataciteDatasetSpark { +class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { + /** + * Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { - val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass) - - def main(args: Array[String]): Unit = { - val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString) - parser.parseArgument(args) - val master = parser.get("master") val sourcePath = parser.get("sourcePath") + log.info(s"SourcePath is '$sourcePath'") val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks")) + log.info(s"exportLinks is '$exportLinks'") val isLookupUrl: String = parser.get("isLookupUrl") log.info("isLookupUrl: {}", isLookupUrl) val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) - val spark: SparkSession = SparkSession.builder().config(conf) - .appName(GenerateDataciteDatasetSpark.getClass.getSimpleName) - .master(master) - .getOrCreate() + require(vocabularies != null) + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info(s"mdstoreOutputVersion is '$mdstoreOutputVersion'") + + val mapper = new ObjectMapper() + val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info(s"outputBasePath is '$outputBasePath'") + val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" + log.info(s"targetPath is '$targetPath'") + + generateDataciteDataset(sourcePath, exportLinks, vocabularies, targetPath, spark) + + reportTotalSize(targetPath, outputBasePath) + } + + + /** + * For working with MDStore we need to store in a file on hdfs the size of + * the current dataset + * @param targetPath + * @param outputBasePath + */ + def reportTotalSize( targetPath: String, outputBasePath: String ):Unit = { + val total_items = spark.read.load(targetPath).count() + writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) + } + + /** + * Generate the transformed and cleaned OAF Dataset from the native one + + * @param sourcePath sourcePath of the native Dataset in format JSON/Datacite + * @param exportLinks If true it generates unresolved links + * @param vocabularies vocabularies for cleaning + * @param targetPath the targetPath of the result Dataset + */ + def generateDataciteDataset(sourcePath: String, exportLinks: Boolean, vocabularies: VocabularyGroup, targetPath: String, spark:SparkSession):Unit = { + require(spark!= null) import spark.implicits._ implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - - val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") - val mapper = new ObjectMapper() - val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) - val outputBasePath = cleanedMdStoreVersion.getHdfsPath - - log.info("outputBasePath: {}", outputBasePath) - val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" - spark.read.load(sourcePath).as[DataciteType] .filter(d => d.isActive) .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) .filter(d => d != null) .flatMap(i => fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) + } - val total_items = spark.read.load(targetPath).as[Oaf].count() - writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) +} + + +object GenerateDataciteDatasetSpark { + + val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass) + + def main(args: Array[String]): Unit = { + new GenerateDataciteDatasetSpark("/eu/dnetlib/dhp/datacite/generate_dataset_params.json", args, log).initialize().run() } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties b/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties index 63cba917e..81458d1f7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties +++ b/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties @@ -7,3 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n + +log4j.logger.org.apache.spark=FATAL +log4j.logger.org.spark_project=FATAL diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index c7c6c6a92..50fe73d9a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -4,21 +4,29 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf +import org.apache.spark.SparkConf +import org.apache.spark.sql.functions.{col, count} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension +import org.slf4j.{Logger, LoggerFactory} +import java.nio.file.{Files, Path} import java.text.SimpleDateFormat import java.util.Locale import scala.io.Source - +import org.junit.jupiter.api.Assertions._ @ExtendWith(Array(classOf[MockitoExtension])) class DataciteToOAFTest extends AbstractVocabularyTest{ + var workingDir:Path= null + val log: Logger = LoggerFactory.getLogger(getClass) @BeforeEach def setUp() :Unit = { + workingDir= Files.createTempDirectory(getClass.getSimpleName) super.setUpVocabulary() } @@ -31,6 +39,51 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ println(dt.getTime) + } + + + @Test + def testConvert(): Unit = { + + + val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath + + val conf = new SparkConf() + val spark:SparkSession = SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master("local[*]") + .getOrCreate() + + + + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + val instance = new GenerateDataciteDatasetSpark(null, null, log) + val targetPath = s"$workingDir/result" + + instance.generateDataciteDataset(path, exportLinks = true, vocabularies,targetPath, spark) + + import spark.implicits._ + + val nativeSize =spark.read.load(path).count() + + + assertEquals(100, nativeSize) + + val result:Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] + + + result.map(s => s.getClass.getSimpleName).groupBy(col("value").alias("class")).agg(count("value").alias("Total")).show(false) + + val t = spark.read.load(targetPath).count() + + assertTrue(t >0) + + + spark.stop() + + + + } @@ -38,8 +91,6 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ def testMapping() :Unit = { val record =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json")).mkString - - val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true ) From 028a8acad8b392673c9c94e49f15dca1c51b279a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 10:54:47 +0100 Subject: [PATCH 047/176] add test resources --- .../datacite/dataset/part-00000.parquet | Bin 0 -> 132914 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ee59c325ea304b3ba8511b196a9c180faba78de7 GIT binary patch literal 132914 zcmb5Wd0-QD-aq~sNa&$R?;sJP2|U#z-o{l267Jl}o3zvrJHj_G7F z^Euz2_wjnaKGPNM=1s`-RITYPTdnDN0?wzHNUOPw=U9J`=iIT8q=0c-C0?RXI%j=#|MNp!^rFp26)jg>SnCs1up33grLvmRBG53~Q-V^Wz{XEMB2J-y;#(*DYD!o3-#T9piOU0Ck?%*8W5C6G&7w>WTf?jWh z=1~}_|99H#FID~9thtUghY7|T@WJSg@TI!eTA$bFb#F|rk4j0aZt5xzlsKHqXOdE! zO{bDF3o{-NMW3s;7s~d#v(OPQ=i;DF4#vbK>dJe4KHd*Q8jG^gY>bUY`FM6T>v5^; z_aJkS^Lm0_kM3~Con|STlM+%c?6iU~1zFDL)g7+#2{4{K7w`rAWtTO*O{U{9f@5HI zpzoZI(Y#onM#d=X5jlVO9n}cmcHU&8=yQ9ch%Gt@VV0e39Tdn5ip)Gr4Dg|Yoj z=W3|g?-N9xHO#rw57HxLbCO%~_@WUxCItmv_68+C7v()(*)PTXGAD@wXF!R&NYUe# zw&;DL!23<6Do%h^kYl0OzQvCQJf0v(XqM+WkrO?F>yi4a5W{(vz$j1Tt2V3u`1}V<~WfTT%KB;?`F;~xMf}veBPkP<@E?Lmsbb` zU6K%!T!I+jA`y8Y;PrCl8jh`1aoJp2&ahsV<2j!9WvU+_hb5` zN&#WOD~Mtu3c?%^0&b6Y*y~y!5&gQMMwlnwJtikpu@oB+pbqc)d#pae>mFN|QNkG` ztbIWYy0*53CY$*H@A3G|nn$~!sR5WDWiYPfSPuuI11dtd!^`t9>lxlHiXc^i#qU~S zSq9val&P$NxffXv%X@sGKcZSO%16B3s2~i8e$#CgK`%^Ya!`^b^^3@VB~`h_m<8>@ zxyDabRB>E2jEz@t-RG%)da^zp%X#?~%aAs}3FW%C?XL&2pn_nv`Gr7LFj&ov2UISu zd70WMctHMqhVQ-p0L*?=O0Xc&fuLy-Jn9zgPm`WN(5@7O((3`C^6P$V5kPl&-O?Z{ zM^i~w05#y}FRE{tWaT})A0*zJD1wFJ6$Ll62xd3vx}i$|6~(!SKq0WnTwY>%(JKf# zD-4p~?}p+)9|S~CCw|+kdj|bh+A-5OCO>{Zg^Cp=+VvzT7!r>XcFvhTOGaGV4 zE@;7sOMn)6Ji0$3@mGn8VAL0h`3IstuH!NxG)eFEabmFEiz=i*(8CL!NHC!Li&@}d zn%z8czl!tdKurWaAd$JDF_(1UVWUU%fLOO3MP@HNU6|TO=YaC_i|(O}#KJ(q>g0XG zH6Z_>t6{#lsU7+`epnUmvBbul$0KYU9GIb-7M@i#%x0rXE|rs$Bh`O4a(>q9^|~s0 z|99)tb`V)Tg6Iu+MXXy<5%7CB_dq5Q^cc2-JrO}y_{5;MKMlgv)_}~QJ^U~fy1!}* z+V<0bs6^rTS1z#^nhJeOrR5~lEhnQg43NjG+3=JHN(OC}il$)RK>2wB1Af^BHwRo$ zWY87#$2=}cjQ9l~7xe~Wflsx9fiwnTJUju)KpJL@zYMx1;0k(Or`K9xPI*_bUs&%` z|C#Z?`u9Q8#D^xG(P@XJXw<$8i3e7we8Dfb2pMDqT?@jY&eXR}st_Z=nCJ}(UXN>U z{Zy59EE*gzUBdZA-sAR1Gb667M>Hjt!RY(l>k>*X&gO>Xg30u=AZxB}xn~Zn5g2gK zMljE?)ct|Bn+Y?B4)5^?2G**LVAP<~!sJPMek3yz1Izz=t?6OSzk31XO&=wtf+aVT zr5|nUEu8&q*zcIq@-M@FY~Qc@3zzSjSh0p6wfh#oypkZv-%e>&>&e0TxoU>|inOaq z@=q-hwV6zu(ZTin37eYKUN`I7wM2mYc-cbu**}|v&#EgHzyq(S7OMl~fvV&1z>ez(CUc-r>cZZ${Jp;CBY{`v;ENg4Qux{Cbh#~7UNf4mv4&%2=o3VQg$ zC)LZywy}1#Ue!CUxtUyl+jZ(H?FBzBczCv~3Hcb`jcq9(3JF+~aW>k;dr)eQ6W}(`w%0CCc9Xxt(<#noC zU#-4V^`lqQqPq4WXw&5(%~Ry*5vO`BIe+DLb(det9T+$X=fBS$f*SMF3ez$FRWo$)^}96hk+02sRs9~heb%^|Q$5K; z^Ok>8{RR2dhY!GmI}J(o5%QL_MLk3=xMaDyMYScS36Q0??tx)4U-(;S-F5els~;wd z?i^6hCvVpN1ZAJ|&4sde2-RPx=8d&JJSXc;wGm4ENUQ!Ri@g1mqLOt6Tirv zvHf3S_m7``diUPX-`(==Pd|NL3Qz2Q+HLyiOzFQp1tsn{J?+hZ-9NVmp-qmA9G7xR zDw!Qp(pg7nlLNd>IhiY*#iX26_?sI|%Z`vkQ+9FIQR`4*a4u(av6RAsSL)cjd2_8p z%H=XjB%hPBMDzSKnE!@UzEB8P>i|`&Dd$odD17B6M^Z}2y_%&hU2-Oy0*?s_90a#B z*(oK6q;a8~&5k;R^N@}=H>8p|aDOxb2JRV<5=wkj=Wh&0z}HN!OO3!qX<$H!E9z4L zD0%hf)wPZhC=|L;%}X4cYjqag7P2QFQF~R6S`&{P z#0qmv&PFpzx^kiVgK~rJyozuno6A509JOpY`l1<1S<>0s+r*?ZsSyQkW+Wz`R0b3| z#%z#AnSoS>iOF#V)Q*xItYzSqWKJ0v{rO8Yl}NzNQ7Nakevy!p@JK4N4t`=bD7hh~ zu`}$7NLeVUCoO~S%`z>c=@jU$ESy&Lv{bW9I38yP@?gf}Olxlsla<%!3o1IxC`krx zgg-eMJkY{S!Y#>sG%lxd&;}-}tI;!snIz<`(B_QHMC17^+zhqlQjC;_{%lmh$CTrv z46z7hbr)OX@`xN~BBR7i6enKZ2tCcpIVLs0#71HMlqeHZz&C{InGq?TCyrFj;aDHz zV!*4a)EzT)s)j~QuM-zGwN|&(LOW|2xH}S;neYf=bZ=;3s1iiNC^3zTT5Bsqwa|Vg z!{nrmsbq@SYVVu_4njON2*VX;q+~LcYwFaTC1G-_Z#U+1^?vm5Iq2C^Q#ktL+I@PHM8Ml%)OFs%TU? zuz0bq4`uBZBO}2CWnJ%~EcnMgEihqFZa$ZSfwdc}U}!-+VB+%3a)uVDeU%YGJL)O^;krHI&Pxvmv(| zL<%C!e2$f6H$0olg3FK@Wz$3HYDRLB`K+w|P!Gb9jgCKOpN*KrOk-{oM5R{!PL;e- zN~Gg3H}RDHP1D`xEJ$t~Mq#!Q!Y>G7iQOECX;R*pa}CN#Ia4`d;3c_50`VHnLZj14 z8l*ibXIbK9D+AF>PEIgH9dWtjXxc~S)3M4KVk4oFvTCO>nNLLIO!!yEbUqSSqJ7s+ zHV$S|8?x+1lIPkl45QDiLh7K*<9Ja`$9<-FJ^YrsAiG-YqDzMC}8XB1T<*v?#>dGVfYys(5 zev3Lsf{cg%14YN1iMXbMajydvC&yjh(3`^3sY*!+|BnbR1N{z~MgQOgETKw`sYyzIBmX#2Wlh4LorH8T`8{s+=_~ zKY4m3l6Gw89MIgHd!uWh5QO8s8Zw=Z>;9l}4~Jc7HSw}>Lo%L{VrBE2%(vGRYnNma z627MR^r`vK#f6_g`8XW$VJQw~$WSg+7PB)EaZRYI7}fsf=3?E5V`H(Zma#ojEM$?1 zIh96L-7M20Co{@AN39{pbSm*I%y})tT9Ghq!J0c5ehd=KlwhP?*^qN<7>722gaC` zfS*m%#34kKzyAf&W^(yV`MYF%L?%w_=)z^|sUgI)fo4n(G0kjwoJ=GX{00dv2YW1L zk#6Rx5mXyynWnfhCPg3$D<6gWQ*kg0AsZ6Mkh;)3b4O}ON+cv=zUyggy#$e1qLwMQ z2x+7#bY0s9KZn0IB(!pJFelX*U0qT6~fyJf|G zq3dDPh-VvAL(=hjjTweYS11xS;Oh*;Aw$xf2RjI5n#^MIU$u2enL+%BPDmjnSN9)x zu?z}rKqe>j4P+;qXa9`AdD) z-c&>Z%emkkuc|pyz1)}uuL0)9jzXiTY}TnQsG>(1RI&r$XrxB%%mOJ>aDZwVp&SXD zewjVgra7h^%gO`g8}xD_{&Qc&JW|OC%#tVLq`3ohPbwkb_(5R7P%0wDT%GdpkOUK4 zZioBWS?JUki0oC8(pch7W&7_=~xrpXDc% zN1B5C=$pZaZ4AC>ZUJjpmtR4gmGJwn zqpF2N_(3BY3fJh$V7N6iP9+fuPqKeP`qWS=pHsg{(4z87k*aVKKaLH5)TiR{R0994 zgfN8$5HbDK03w^>amrMx8$#{2lUf+%b^ofVHKQa)@thJ=N+9BAA?nG= zBLY+;OCw4S&(avm#Kgav0IOB1F}#Ts{LwU(q)?h--E?SQSz%6{20Z#QYRA7bl0)&@ z8Wz_XEk7?NrcsWhGSU#!cH=|l_Ec;T21J;S#Ls?_zSxE|S9n0%J(^lKx52mwJWH5BlQ8_taKi*jnc*K* zt^3KevhMU05ctF|0Mj>ksC?!G)HGu3&V#JRSNu!*kC(9L>s8&O^n=)B7T$)0LP1Zo3babw1O3No z&5jxY1kiFShRB6-f_l(#_+fpIl#DC*pVKsSXb`EAar>W7TClba;$0mB{4meLb`%zYg=R`7Jj+o3S%$m{KAZb#zl$uZ_$zA=AN2liM3Z6ji)xqY5|c&?AnVV{1#ITevhvXH3|Vs_F3ZY8m?KSAF@sD$8`B<8rZR3e=yJwB z2z4TPyw7N|Kqn0aY0-Bm(IJVQW;HLBl`PoFGBc>|pACn(VJHL6m%{d0^h@)ZvB-f# zB=m*_3CSZ_ElejlETXzdKlr%sJ)jT1n(f9p?YUq8OMME zTCa;Lap)O`C%_T2v?Hl_9uLgcG7hgm=v8qE+$2Hb=L8> z%8~u3?UPE){F+mh>KoYaoFhF5rSZ90^xrFqY84kGzQE5`HhJZa%625!vw&fL<6T0(JhA^v?l;x#bXk%-( z3pD?t0ir`~WxdG$rC^>^??9#pJ9@iUR>Dv=fc=xB#76vDC3HAri8}eG=sqVcJIB=; zJv9JbaJ*l!Y{P~PtQ^nE5bR`FKv3MdSi+spVsZu*cWT{`?9%kpw^cgcsM0iS15b^S zN92RMxxrAPESH-Cwr` zjqOE-I@FbTU7KNA8(Rlo`J3e{=MJaCJFD7(Vxno5dZu&QEaGqYbJ7%bH1M!68hPkt zpOVb4OF>|5cM*b}CKseeuy>kXk-$w7?1*h75mP>km@7Pn3k!WSs2b-i#NY!4%WwJ6 zg-G8sBq{4^vNI85+NJH3@>%K~Yx7JbUV?Ph=PpLI3qf5C0ce!ZI>y>8sC3z~I1z%2C5LgH0O=Q?)<(uUW`ar_KZ zcH#VPDK;cA-H;jupJhY=4LMWXibB<>`(iLS2P?xD(D`E9Pnx|~O-4EA1;t*-xC9D0 zUBAb_S*QMNYq4c(u^#f~nZX~j#bBKe3YKL8*O*FWLQHRF6dboKwYC@&TwsA*2yd9B zkiG^VHf?Z$?~s%R@f1wPnJ!D1+cmRhu6M^g=CY*`c$4CzmACFYi0J zOsxz3y-3B2;@^w9{#RlFaqW+vQ%M#vkL31zq{vtcI z_s>P@_RgD8Sv&u?Gk>(-9okqV*P@gmd)#3VBC#Se98>W5)yK|L_2lr%V!YpPnl5_O z20LgyyEBH2->OV864;xT);>yyxD@&~%vXsqdAVMeyhiU+(Kbjhn zssHd2rKky?E-_~S&|4~vSkgXypOVc>Sg6I{mk<>P>Y2rPa5wS^R|BQ;$ZP67aNdK;zO;Eu%-KZ%boMILdwR4|`tZO9mo*V3_Njp8aDLic zYQKk|*Z=hu6@IUnFl$&9AEd&*ia;+1kWnBch!f4F*Eg36wn_C@O)))qdvhuAhPtKZ zccnE)#N*xgZ>4p=FQxC$Gy*9BGQd7aph7Mgl3zYw)6seX8Lm4x@L2{`0cjyJ0GES) zfj&k}tV3JYAxJ`U0nyiFg0GQvU={u^!6e2CkSX`kyUB_5D6$^e7tAkY`#dfY()gUu zBLrOj?vIIyV+4PU_yElHNFo&@&gxmekM-jd0esvL>Zc|$h|M4(?)@*y3eZphPNVuX z6U&H!Wd!F5xB=jXx#u8rJmH&ki*J(VJw2pG1A&PX#Lg20+e?r^j-@81A%0@cq-}F1 z=|-#)>+=B@!3#|rS&yU{I|fKdM^6l>t{G72A@l4B;(c=_43n=hOhyqIlfcZNyXgu2 zVj~!eKa*1E1RbH<0RLy(XQf}DtpS16&t!pl^mbe8Fgc01L71tssrM28qTxPKE z?EEcFbS2`=M^M?k`u?J_>BI^Z5Ia67GaEcRAErsZmZ9z~sb60bT2gA;SHfPXdr>L1 zH1jn|DdwBBkfj-8$z6_uLuCy0r(C6%~woRn8qqhDDC`f zD}B)1cE=3Mjpgi51VD5z+B!Z}Yl*Ka!m${jep!I>sjo`4ivX9$hg)rv^k2;JaS;56 zJ3CwZn!0S%2Uw=+$h3tBsynz~y%7t}IK zG@`1tS1mG4jjf&aOS%`d+!g3dm`()s!ZuUnNGS>EGky|R_TOT(3p_ZpjK)qPajF%yEv zW@hq%2Wd?cri?^MIDlU+HB7(q3EOW=rJ?$9vT0Ofp&w}EbM$=Nx zF*xy(2@cCkrn=9kP%Di1c@wZner9*siX1jyBVtZIZ>nF6kRmQ%?29LUwBG)smF!Lp zzF@rXDANPj5P;keMOnlL)H)`-u=$8ude{D1czyE?>b@>Qyjl%3oB$9Sc;Dvfn$PYq zS?(}Zj71W1jE!XzV||ZXq#T=3OP^3Twp&io_Dd5ZfXp1cEPRaZskC;cqJSzXDQbVG zDx0P@w>7_30T6ntdi}-DZ#o4Y*_*A9;b1^q2B{(%?^|GI0L4iq(x+FUEd5ivF_i%D zC4rSnyt9LZQ~WiF6rC1j@ri}>j)f?O=XMz30R@oKRqe*)hypeMucuW~95{9{%1Ohm zLkqPVAc`Z%szeH$&pfzxOhnVAdu`iyYP88Vk0zcm_dw*KBwhG8qle^}lBB;Usb6<6 z5%2~WpvUFtcc*I$xjWp`t_3lvzix90mJYpK)#fAG4VVJuz2{K^MQqt_9>A(BA!0kK85kSgkUe{ z4+;26>uWbmv`-mppMv;-Rh7CAsP9XTM;5=hvwI5uX(rv#Q|1M43f!MmK5Mx~H_TV! zsLA?YWWKNsRP=vy`FLIk0uyTYJ6L*sWEg|^yf$U58ZSS)I3Dh@xG+6wA^t?)dcECN z^GSsx4E<<(&&qyMFRXL0wX^3YQ>68 z1ZH%IRsUF{#yiW4T+bf7nPFqH8?rX?MtrENVjg6ta0@YQe4Z8n&jh}*R@=@tu=el7r9~PWVXo^ds+(P* zo=@>+Vnuisv4$NhC(OgU0C5CSlb8l66FJ_5YN-d#t2bhy0`0jLsoL1Deud0ksTf4T z8}K1N3b)N5Of&Xe0uaTyOAxk9*Ft_S*=HdPwmrL$-MG`J>3mtW{6uJmrUIBd98lIa zUqXI1Lt~kt0dM4lt?q^pPHO3amF|Byc%<`s-nQX{nh zxcd)CO^jMDuCe{D*z&q->^ZYw-=6gk*sipKOEQkP z)1=GG;rqMs0heVqi+K9)ddK4xXaL_>g(9?THsz#nQ4atBHLMomI}g6F9C2;ul}>zY z`{84o%Ht~A@4Q?Sd5Jtei9Nrh?pP@p25P$7^krEOO27OOM2Jgqaj*SJe3-O6QbLp% zrc+BRD<30h-!1kFnq90}gp8Pr=dsNRgsKqFpP9!7(1eTabFqSDQvJQ9;H3P;c6{Xo zsrfyrNpw!|)8I(*(@-mYyVlAqSqPXJAO-!X)?#(m&D4;Pk;R|ja2rTB`~MJAcg?7w zHI|2|HPfdRtnO}i6(p%+11Z3>k}`Ff8{4O8rAXGjMnd}N6jGdPb6{Bx=_i$+=AXL z6Ehmk4Eq~(8BJ!jh7woPE&UARSPp5yF>pvq6>1;JZDL zx|H!f{q(U}+AO3#E$`~Osualh z#T0sQEt0652H@$HA$(ZV2>C$ZI8-$*YFNeyUS?gSnqI3p`msUZ0e44nRfpVQX{eGe z#VISbmlA+E-Gi_7%l8o;L)XH)J|XdRodo7Jt3;7TrI7+4yhJ(OQCYV zUJSxN+!L^UwN`guRh9up0w97}c>_LHO|M!@{bTK#m1yE?^w!sqzEOWkJy`9Ujk3Zm z$bK(mYBokxsdV@&dVY`uQiC{?Ae_1%j7(Pwa-<#4;rFQ*Z=0(1xdadH?pJxlzIOuG z-!}ErHP_rawGYxh$KSejY6b7@<%1#c@x9I+#gR9Q$6pd(EUWulv1OAH90!yRy;e-E z(&F=r&n#+wH9()LB_+G5oEJiNK-h}`*uU01m)7xHl;J^5(mvsZ5_v26c)!$OV zwbBmiWWS!7%``Mj+#K9;a}ZaWXsN`=Femzpq+8 zA1^=8a$881bvwsN)Q`tWV?W=Y8t50cl7xYjlKnMX;X1XIRQ2a9g;U#BlI%}s99v0! zzto?RY#ZR>_#f!PA7|l%f)97ceh&Cr`X*z5P8a3gbpy zSa0J>%u7@80Z57CWvzIR$;60aiQq)UypJlh_{(;@uibgoOm7%~g=8w5OQlo!c*!?dw0B^q&QlOE$&|b)DwB!e$yMS;V=mYqF zV*xjZqyh=2-ElP1p&P56jPES5%xNs(C_%3uIJyGA`RbyHr8+pCUaA8GCD>@q0KW+` zycs-qm;|8rI8QxpqkFY=XSLxu+lI76<@oQOSX~?dED_iSfN}sJ0tlzAUYU3)h+hg0 zEUbWpwDX@ciKpILRm=$fUf^^HTxicX!HHGHmQ}@6zXpGxdD%deNRUIMk_uwf`t^U< zIiay1A4wY9rfNX#Z=*DN;DNaF2d%70L%QmhwhO z+RE|aa{BD-iO0&{f2@4lmn8Dp?{HjQU%x|T{AK&XnV*I~BkhmWoF)rY1c#wSc!d=H zF`4X-r`G=iWaw%ea+k-Q7SbM3ubrUC+bI$?IJvez#lXLgVIV9+`s|#Vu-m_?`}-7X zny$`-%9nG?<$;{U^Z;EhHY^<~Dgxw!OM_FGvAlA1A)TI)VvtEFw~7ZL`5w6w5pC=5 zi;NfHk0SVbiD)MH@Yni!P#^0Ys;I#xZbpU*A^>7g=_ER~Z2=`jJzA`8A~<5RcA*lF z;A2fBr1HuOcbY1?w;*Dx{0dOLV$>fN)~pOC>B*t%iWuNf1J~NO`6cReqit$?%m1=% zgKE&~vbqd%i2ux^m~;&m%#k`II49$)>m~VVcdh6LsCFxx9nkA=3?|jt}BCecTHV_b5?-16>{9jpN>4 zZ8(`rIS%%XT!~Dxg)0$oXii*?jE&&sJ##Z&F+>7I;2JZbs}Xi0&4T?G0Exx&{fH{- zu-`**Rdb-c?63>`P8aV28k!3~>xcn+yr1{OZV`c7CcxGN9$3gh=KZZ#F-|k6f;fXM zBa~|fKAu%I=W&N0Yc`-^e2501j`Ev8pX0GA>Qojt&(cO=z2f-|WEzroA7&w9+1fD_ z1(F&hyBYXNP5~AUIh$O8G6nr1Two$Kr$u^hDQ&MXI(otjni$Ii)3Ig|evY*4&|r5K zoa`$pcBoV@K+IKZ6HL{}G0)WEfQEv1&n$uhQ)#JS8HpYpV=JYR1CN-(i*;roGE!s@ zWHessz$a*GkVds<-=#}w>Mxck-Lp)^zVDA0X z+8!$yz0Q5e*aGoQZpeAoG{F;a@O;~KXu?fYyAdyi&0WA0NJvrYC@q)j3@SNKUAbZR z1JhPWgoPb(&a6FfEuL1=`d6YDT2?ut5_lY#huZ>b<7MZZVZ6Zq~-|?HF?@|W7J*eQ@0dbZk_4~RXM9V@J5a8*{o$B<#cu^tLbo6b43UKFo&{^ zkFzRA$o9sJ&@3ZzbWej1iZ-Inge~;Roa#1ejmhzVslJNVFzgl+?%kl`*aM4^`|x74 z@u9`)yCJvfx{9u#@rNZSPF*ow2Og*Bpx2~O+oMHW;t0rAwtvpBu5^e#pXe1G%|@qr zM!3&1F)dRw0u{IR78p+yl|QaUVdsx|Ez_Aws?C;(ydI)DrWJoV9VOMDP}i@dy*g^q z^n>$vUq~aDB|8G_N?>tDK|MSjr3Q45Uqmd>X4A2a)ZXcqtwEG^9vpGbC5<4q(Wr}h zegwHF$)(#{=6EQq?dfV<_S}APz8-&TMzUk+7QB^o^q7A<53Kw3n~Nr*MZ2R#BrrEE zcW4k(|D=YV41a}h$Q{q1HvB69^XZ-B>BG2xs!B@lIH_gAu@O9Z3w7c7+KzlSiu;UO zz}RIwK{hH`>^Ofs6m)j{#j%kKUyBCChA|kG&c`goG^zwIaZ!IS?$cvSYB9Z?#^<(b z!HIK+E+SCB^gy#pgZkO$;M|5Z`hK8e_R~o7voS>)JZlsOM6 zTaEn%K5V}Q!Q*1#1OJ^QTojzSZO@codK&d9X}jNW*9zOCF3UYE%meiX7qu;UCJY=PoQOIHWWEOHbKc3~7XVC1f;jD=3bZo5anTn&^>Mz;|h|&!K2zGXC z0>wMQL4-B=0_wJHI)meCE75BUb-eR(MK7eg0E_o}R~*y}ub%(Pmxc=DBPyuoV!+Mu zJ+n0LIHbO|KBNk)crAo~Djv9cJ9R*}Z1_z3m%c529>+PsuPAvHS(B9{NL z;|=ehccv4c8KXjY;CWr0AMHJ^t7?Ei61?WvEICJi0>O{W3;;Cp%pH=Fl~*NUmY%r_ zUo%d80_X;wx3v}+y})M$b}nES^s^2y{^?XYAID$YD}Xu-i8v)&?Wliyd*3l#-`!e$ zn=B<=)Cbdc-a;$l$vx#H28bFRf@~m z^O$xCJBC%c6u#D>jm?6Yduujms#`U{6C{48kIjN~tj=bZnfNRP{G8oeh)swg3_u3~ zw1xUXt9y?;k7?CGrilKn!wL-IsFIE=^bd}@C{o4p)IL%xC(CvC+7qM`K{t^l54~kq6#Ur7}3^ zM2>@hOuW&ZAn}&-k^Jpi9sibVN-U;87$YYoRt8FKmL15zrWRl^XOeE>Yi~broU?%8 zf=w6zQ20+V`we3ks1e6kK6K@-FnvQY-hL5&M(?B?6RXv5oLj9%9{iHo3OEn&Pkn&{ zLAKO&cs5&sP>`rte-y>|_v%)wLjqbH-o09_SCWuduKs=BN;NtttW?+j`(8)t%0*?q zk=5$XAllon*49H@M!(E!JHU)MK9($m#;G<5WmCcaO`;47&n;J*S2-F%HWuPd9x}-! z=>0TZDI9^2e)#3Fe^voSU zXC(0QVxXMzi}m3VNr_YYIP5eKWt6_OZWg-z5ll~Bg&ek5du#_W@hVa#9>MjGV8>o| zMMRx>2(Rf^)9;R}hH~_GBQ#Pi<1Vd?xeLluo zLZcl4t#f=bb-A8uzFfUi%cSEH?!VN?tV^agB=J*bE$my!(%MUbX8{Crq7>hZalLl# zTlmvy)oZY|`DWa^vwNFmOsWCq{0sbvU;U>yVO z?x9o+2&}0MklXmF66J_Z8d}9fsmJy}-4KpgOWHP~xs7EXqMW_in2KcO%!vAS1~v#R z0ss!2xp^A(Hf@T@n8rVO35pA5b4vWovbB&Q}1I24^;!gUNw} zMm|xwh>Xc;T(KH6Zg|4JN;jVbavUDnqe|s)Zf4mgV6O}oux4dUeSP%{_FItj~m{l!Jx3pzRhdq?836vG$q33t=?t^#wvU=JQJq70r)k}P-<`+`UbzYF90Koq}l zxr&L=UKUUpVAMo1N(?`Eg+7x4eW5$41&azu_Z|{G(e^sh~=>B!?ghnOX7LFIHYP;hC}xgLKl=FW@0b@{$75|O&oZ| z_uqo(Rh9$yQ|I_CF+^0O4#z(mHEi=woUv2R1EYyf3Tv7Ov}j_UFfdQ3UkS+?>Ux3f z08jq4%l4nOO#Dc|A^u2|J&v-UU5_-^hrT6tuR_f|gc&xVf-ByE!;4Vd@{skjeMqwp zvou{>Mhonr2W^fYDwO^_?x~JFH;5)5&e+hnvToJhnXFcpER=g;Nk6z`x z#VTD&KWC+$wl>?*pB|*1wT8c`Q>lnuoqV~WgNRY{8N5?NHlz~upOiPhQ(j4_6{pqo ztMxc-!u1B~K)I$8xt$+gMYFSrJf1#-UTE?qY%g6!+yMFYSLlgHTV8y$h5mX(#W<<{ zUKGW5WLMlaZ|pmd{^xfA(7#?aTr}J00OSq~)V(vH$C~o$Zu^rU3lGg(QBq!WHwNGF z#7rtx0xsy?nBH&2g-edRv2KRWvj4T0Znz9`D|VFVN!e zWHV$9Kw0H1>oNXTr%KE8`AL|(AM!v;E09p9++=&Q)Y>dXfuoyRx65DiSH16tx-MJO zu*FT9Do-zWSV-IVRY1jS>S_mT1p=O?xB-;4RSlqO`-^y;JKO_7z(=$3HIC2~d2Qjv z3C|JE!3Bud7)DDMr)@tqLe^k~nHK`0%Nw9~M**LGy@{Za5WZLC$nM>ehwRnJI*|R4 zCm7<}O5+o~3x4Qb(6=8&t|9T4Cd;ccDxYI-Fxt+NeLV$|*=N=CoikwN@ziV{ZzP)t z+dWRrblkguUd0i1-vRp;NFj^~ECP8xvA_(74RX7^%@C@mN7bD2!P##eV!3G^5qsT2CZ2LMmpSYtYMXdI4~OIVyuNi&7t_`Hti5p! zM}mttqXZvt(=X*4|GzJv_|;Xz9C@0hYAby!A&LQDF6%oj_8_A&7hZxeSH0XgZ=F0k zH~PwqjS*fB`hx>L5%fkxkojmVFu+5q+82b~LOu`GBk%h>v8&85{%KoYxJnS<2Z|g8g6_pZm4NtSXNMQ)! zv-{{@lf+Pgot3Jke%x0-bunxg_+c2SR=N#Z&pfoHdvOyV18qN)XQW7M9j%l<^R zD|!4%7i_;s04r9>cQ+FD=5P1Y2WHd+Jjwb=Vn9;`c)92g`aGM0^#( ze5!V-yY^|wYHrXBh+D-Eocc{+ld{79z17d z5KEVkjR}Rie|-1$0ur?y!H?S!z2cHeweyNgMlK^z2cE}=v%)r{X+1HH=Jz~vAPIoz zmq-5N#+;7Zp=|mGr9L;H6XXyt}1mYo5oa zOf|~s5pmMCm(;|?mYtQlwz{|O-J4X^JW(kesG$z7Jo7;f>s2*|smtzgK=p3#R%0muZ(rPR6GQe9y{{? z>$v{0BLaHp@IRCH`NVJT?Y`|^@wfM4x|iB{l+e_CcQ4}+;Z+`mS7JDfM_A7hqaWUq z;^zq)+Kd~tHNNt%a`0XgHt6L&;))!CcYg##Sclw14%O$7Run})T-FvSLIeR9fyi(6 z!fSp4Jin&!JG@i_i(~MEm_tUM72&lGD^?NK01vO`fOmonA?y>N_HpP`rFlaGlIwl1$b?a80jPc z;LnZR{wHLF_kFJ zLze~UCmx*OJii<^&O@EpVFu*f3p~O#mj&u8kj|t^d74^d^H0}UDc}XHm`d4)s$hpFn^$r!@Ec$MAKh+5@>qWCozi3@n8o{ggn->lP%OW9B`xdd^=n!L=*_JhG9aT$`wr|#9dkw0` z4_~cHR#VZQ`eGd(EfdZ>7V5pkH15#SZU2FUI-TalHU*(B>Ua#v>fhNBe%w@k2zEW{ z4q>epvg9{tQ3EZWM@6PnJF(@4X}|!gk3oUjh0)> zYMQjVIpq{RmCW44>O=XYG@`ls#mCE?jV9tSidi-&RX@KgsB3W#*WPMBzIX?ITZg(G zsYeJ)**wD`s>zIhbnMNP!(LM%0Ppk(K{AdP9bd=kOC7|Ok*O;%VRzWD?VZYfudMlx zMYuJ_=8*HE?P0@7m$PpXYAc`8I*QViYcK1cRCiao?hVHUbJd^RcrP9;r{$}0wtTUd zI_!guUi?+IJ>~cw%Z`!I)unhUuQod_Z2$`Aw%7OKT@6jMw1xK>;n!}&(<{2SHXx?e zsM(TamlF)aOezuufggl-?P;(Zmz1@#6k z3wcw#ZkdYRh%Xn>JWM&jPndfLnQM(XmL}BemG{id4RJ#Xk5ZR_pKV&&=uaL zi@MtznFZagjg3u>E~)*!o~B-=si7s@;e$7oa9n*C)7#q7~XHRQ=dy}iBDctL7ZeK!KnuWT&F3~0wp&n|N$MV5o_?fV!yvqKesv(XK-A&36 zJ+yl*2d*Faq+)dDeXEauLGk5Cw&)%smLDiL|La9nbluE>`l3oN;jQ~WjJ*k1Q)e3f z{T@u@B%Hh?hvWbO;z@u=kc1E*K-6HuqM(2RZm4L00AULzfP%YKTdmsK#V&TkR%hx| zTf0u{)D~?!wVf_@TD$7hw%X~U-JP!KcOS5w&iDPl>-t|;L=2eq&AU9$egEzqWW(}} zr4Z;A@f%VJ8q%r6P9uGXN~2vr!LWK0^MdKn(_fVtKRcEbt+-W|W7nsw%6zD^DJj~$ zH%(DeeHuHmgUhkuDxwK}%;RUr%~5{`@s2Jj=Wv8ZH_>RvSIoKDw)SYvI*3dR z;#aIA*3J9*>U5Ze4CXxv*`Df>s_Y;??in>uG`V^F_uYd}89h6HZLGYXB$jz!XIx3q zP+0QUicDAK7H*{b?F;f7&JHl~o_7pCm#Y0@If{BW-^rrZK=f69Aq4oQG15p7P< zbc*AjZI|Ws(YHAnd>n6c8wPgWC){kbai+N+&o`CJ8u5L<6b)8rOFFWkZA#`>EFO5HK$>04#qw0~^HTSe z$ev7GdiG?R?%7I32R)uG^Mv^W*~Y_ah)t_Jd{#E;^)xxAM|);@ROb6LISHIbsp!#(C^o4d}%lv7H*5(WMt}LdvucBv^JJ*q0 zHdO6_2=#u$jVda?#dO16+fI?ha_{=+lbN=q8rKcc=kKSE$5mR`bbG{7$I;9L>}h+t z;27N807I2lr!ozncK2TD3DhDKp^ti^4*Uu zr@xx`C6xYBd@k4W94b$x)AY}#22vf~Uaq2~-a;=r%A#bUK_PdVx=x&7t)MH{3PUmO z)6qvaEl-|;k`zQeZo|DPtZPcPipTnvL|?l?GFZ+($*=XQB`wUL-r}YGuh6Jf^nqL% zy6)RTFMLS!TRrrYhhCheRjcq23@f020IU z{s3+LCmk2ZyFJDRHLzj+F5^P_?$oqEOAG3LWpJab<};_6ONvQ+c7!{QDl}Q{JIumKZ)bV6mZS`H}~x=K?SDwe41h1cZ(<;ur2NRUKz;WXka4 zM>i9H;x2w~8U4bO1zj{CC0*el`ib^VA9d({rWiu-1zwhTSJTI&ADmdtXYG!Ftb=jpbn0uzr@RU_RChRFK zo9?OB-}ZW8HLjgiCYE_?Ju@n*vVy#p)9JhtC_{BK zFv)x|lZCD{vx~p|`ej)n>|-`DzaR^8BAW<}b&ee^!~sA3;59A0H&EZd_D+ectXa72 z4ZKJj1&CB1C33v@_lx^@v7V~lRzyyQacMjmHoycDA5;D# z=PPjh$gvO;xgO{*8{~JiVJhwThT}|j(cAcsux69(E69H5y|>ubF`kp8J5TZNr{XDsD!RFZ7P2+8Irs?Dc4|FG{!LPEfxRdPD*oJ}6Av*rKKsIyCYTTdaFAB}B=?V}t1Qh~uTrIXrn>7@ z?krPc`B0B)pgGFfrlNImT8TTi=7mUq675eCI_KlV7V^y-NE*4P%!30hzdhw_GH+3t zS_R`2*Ga~xa3wQpQ-7!}z;{d`)0l1+y{NtGQW>A|b~Tjd6(TL3V0rk$H@4deAJLZm z9i$82&5RSB%Bzy(dM0*CMRl#mH6L8OK3DyMJV%aVR%BSkI&<>qhU;ZBO6h~>%ZT!2 z`ZG%ErD?69V7EBEr?;Z+wIPMRzDg$6>hsDJ4;!}~ZBvj!dJl^Vjk=iHD$F}f+u9Uo zY!%qSvd}MIY*Xml6tV&HKQ^mRY6>x_oAtRYt6 z8qhV^Nanc(zG>dq*|N73dVo2CnV%_b`y9eTW#m(B`KMa%6;bAcCOtW5OV%+q7CjJO zFZTSmmOy(^!$XHzU|{mhmrOQKD*e)_pLn(6Ieo;$Kcpmq#@d6vQ_N!0>wR8JpRISi z9GwQL-(?`DpkBElLpby<5qD(iZF!cN{E|sRXR+Zs(>@#D+Ni(JM2c&_G0EGt3##Xo zc&eE_uJ&m6YY$i7rj>gW4)djs*7cy=(>c7V@;x~dZ#=#E zpKIS6@BTY+y*xfG4~3h&T>F}g_=_bbeTj)Q8_rD2(KH-O+I?z#PMhsqvF0cBZ2w?r zF}-fHSY)k7!^Vdm!@r(Np5+IoDby02*K;lG@E(2TN@n=I3$2>X^d|!e@{=+h@dC^+ zn+u2HqrX#RptT{k1Tj$z1oR1;F^8_N*M>ly>guHO_tB-*OXLJAM3!T3Zwr+k64TJ6jAb^A&fyd!juZ7e7x8ly*WDab~68Vak(9xU+oCUQ!(k}$mD>H4de;%}g}w83Mq?X0 z^PKsC%;lbj(SO7#)bPJS3psHMo9AfoD4B(t!iX$oo-D;t8s+^wTC;l2`i3RZHdzYv z-E&cV=ig4!6yMQ!C~M5LWOsn%m+RLz?IO>HE_RXysCL_77y>6WA57jNhYf;Sh`ac7 z5tjL;0kXi6R|JZ0*k*6Soa@icDYWPMeXTzJup-h=Aj>xb@WgyhSLL&Ir<1qEMS94s zfIQ?E6nXxR?^}BazvbxgSq}K6K3Z3o04o%%sX5!8-SVkBiiOgMK3_*O3e^R0Tsi2; z8-y*|$fsogr-ZfV+hE~{yuu8;!hmJu$nNOD9;zrW-$q3}uHN_xGfU!c!bp`{m^;eq z#8WA-MvUVIjN^HV`e#!tZ3a_y=qb$T!b@6>&!GcSd<~ z&-UUw)P;OE*~6o7OeY|#8TMS`^Q<$^$}id!xj>pOkQw>FwT6K`%?02wKOgIm!%pvg zn64O#JQ@{xG>Qf7)&M%PjsCh+i>K+#cSLSEy!MvEGjhPyh2Owh2xOHGUb`JgeL6W=e?kHi4s%H^jIWcKl zoUB59hWhCR!~@T z1L>`P6w2*}$_p~{^EC4~uYWMR@ip{9;VtKhdaiRG?l%LM;e3;)| zLkz>5sfJ7JFut|_W}>M`Z=;LW6S3#hztbxwdCTarj5s=u*R9n5x?sapCc$Nmi!?f4 zXmlonKAB^M$4P%AkGauUN2=)dS2CJ?z}i9gR|%NM!@ch9wJB-LtCctNj#a#(oPRZj zX|0Z99CnHHvfv$g1C!0%$jyc^j2f@hCCXZT=Gi|IQ8A%<8wsIjv?OvRgA10ne5`6# zFn*m|f5w*UYxK9e1a3A>{-S(6L%M4JVsXz;#!iqaj>s8Yw#gZ>2+BAsIGF`bIiqgy zwL@!i!#-eM=p2}M{QM)u&Yu}oT6KhR$1;iKp*Y65J&M0!t0auOw~Az$U#XW>`)Q~V zMOqio1bc`gohfje#y$J-zLzs!YJfiCCezJYwDm3y+CFmhIONW(_>4m6unQgb!Zk0O zBU$kWvf?w*WEyW=&zB20ov$q7D#GGye*JX-HJtiVhMm#_$=X0mKRT2A>+M{(q+crR z(u&@$AcT=u*=Xl4H2W85z(+uIC8PHvR*1Nn;4~5vzW1)3CFotxn5PT;8I|o%`$!J` z{y*rsT&Dd&?ATI2N=t+d9^QZO6aMfcMi3qFv|-vT$d@OvqZoiR2dH4;DC0~Xn>@-m z11XP5OK1*Zard{#r!0`L!;*^v7GmfWP}P$#&-MmpqIwi_CZ8(t`o^dL!UBnP)&kn! zyfG9a-~gZ=m#$pS=G*jQMg;UT^^^F&}_NP3rw5bNmj{Ytg%|~ z3}6LKpM|E+Dga(vjP+>xOaWK`RAtzmoGyuVkdqF6IU#dZFGI%nE=XQo%)6u_|0(GV z%i;kgc55$_1@_C6i9aPz{%6}ajD!}LUX=|joaIF(K|5W7)%lE9B>avUP{0D4vSsQb z3`T{)eHCPn605xk{9uNW<>z^3BEAW>MHzS4O@i zp>IjvDD)VpYT)Eo7CL>iIIx@TkuVtulk$t`p~)|@<(TA{n|!Tt~)ng@yJ)>j%UU zIUpN2AglZq!X){(u&n0d_SSw&)T2x0hI=@$rwb$RFi*V0$Z(!J&*s^kk@IBZdD2ys zn;l#U`CiLqz;tFrN9Vpz(un5>u;vK&YE zBk1hki{gJ?p)(@NkBV|qw4p$F0Ur5? zOBQm;Iy#xiE%Jd|(!FN78FrQ_yit4v;$#_&bMFjP^Q3Ll!mG4u?ik$pYtSqlEGAMsy zP;R5L$dl~QldS8b@x)%#9rzSeO*1a z;BPyY9Dh?wOS3OhHEw^^I50fU7@lE;adDBUil(Uwch$HwywLfWG%y|_*~*RC$`k!~ z&Y`><42+=zGSeaz3x0NFk!oO($}^SDiPN{l$qMsFPV=`)FQ-v9V3fJ$N8^c^{>)-H zeLjBZ^YJq1!yS7J8sMelPxmy_KQsgvq-D$=( zFA9U~@y!uVzLt{*_9Zx-Fec*O-(m*dV(3I6t;5zKB_5#ouwk1drp>iQo@duS&$>@B zERJc`Q<2xXq1QS2pe)D6e7-q1{mo>s?t#CywMI&GYfE&NxA5$i=j4wW*FUKt_U~%6 zg$`e9Zd0TtdSgvA3vs%pKiL6$Rep0zP9!CEASHI!&4yJPX>2&?%}N#(<49HX+N$Wv zRfH5UtEAIyL?_>=lb0ROf1h)<z7`k=*)7kp&QCEDz#$*W)ve9dVKqqlt z6Sy(bigpaah^{4QE6mlLL~TrohalRP$mA)8B@81YihqEv&{-x!nUUWpk~(Y5O8jIL zNe&xZi|Fl#LQXP#OYShAIBQl3(;zPt&KX(Y8W}g;XjEg(YYC?N_%e}x5kqh)P6oH0 zUSVXhzZRaKknkI?G2VJmf;FZWkVVE9NIzd~$_a58e4WmHOlUDNPfs;28KbYrjXxwv z%vG3G(~sW@QqDp>bB`8NBM7hZW+s+jn<_CbSg=m`n~lu!gxOeuamq`-CiyqOu{<%^ z=7#+Lqz5c@gJs229rS|H`cBs%uw0)%rOoRFwB@qfZvuDYGm5~0Dgjbr@#qES7Bbx znnyW8d~VVuf0vWU&&Jk-%j9R@KluAn)!;sht8@d3!k^uMIR|2>LemU|@xGsk?jvZSQ6M9jgF8Rj9E2rVTv zozNpnWT=wRU>GfUxUVI>AGFV;8s@DD5m(yHgRUkCv6H&6O^Y!*a5smtdgku)#Y$D`163@;f;oU}wE-3qz zVB;iF=vuJp4u;$N<&(cG!R{EEN^sery6zMK#w^D>);#2g<*F7am_ZR}y}m%FZ#DKf z1*N0Ki4}80aNztGH95vrM!p#%ae8!CNC_m4#?Y&}AS`BI&~)?(c%%7>w)|zh!<^-MJNH)rP&>UbnE==hACLQ6d%R6}s{qRH-Hn z8+1YeYu;cfe;E>~-7i}ltrl8m0X2g9G>S0uH1Qw(-Xd{rrALrg@3d&H=jU3a5z_II620ZG73SwB@n5m@OBQPH z#=k20r#SOpm9{!1w5v@LWtWLn+@{~D)D(Gs zA-&f7`MrwA-*N66hYrTR_hn+qh?4&%OJ3mKq$CB7O-l7>GLAvTCjXX-BmXLMlTv2# z^K%p(o0K$zQFo_(p*z<^Y_7^?7=Ph|7f5w$st?~Q~- z;a`4Y=wYS4ku{biX3j|-G3=<9!OXpYwHd}`{z<7el~k0QJgLbokSI%NU0Viu;c2Cz zlVxr)Jgqbhuo853hEfH^>iJ_z!y7D9u34>rpOxlckY8m`cs+%oz$d zAXuA-&ce9QLrhd@IH+KzYc?DHssR7gxeoMECjb0u0#9qa5;wKVY*N}lj$J)!X1GrTUj{u-kzS<`T9oyY*t$#N z!-J>?<$WKf8Zh?wM{*c1yvUcW;klSz?ap$t^-$&qF(c{ozUMmPxV-W}n>2~n2GL>?fmoPvNCuar z7!EDBE$kR~EL>R#mBIR_Tl!PcF)!;ZX@Plp^Ujz9)*{qV}L{!GUW@I!*914UZu9 z2{_4+-(53q-j(=>^+ckhhN7dU!CPNBd%6f0cadbShn{GM!n-UM`^~cjjmu(LiFU8z+nXVSHc!-w#JV zmTmZW-;m{@7-*7OL)WD@^l?>ypyhjN1PdXEnSyYh?J~)M~*8WbtLo0x=6ZiEq#%Nm2>-(nhtCF{5qZnf-D5eLJZP~=VfJCm)4PBwT%m#22!pTiDJ!&|@ zbFA|BypQP6_S>MVggK+jz%Pjt@ma*tq8nxeO;X_>k?`Qb3)(dPu;_!|O#;7Rgz zsQycxyqMJRg`n{XPwLPveKVQ3=sKnFotjw+8L~=npThHWoPuKDa%>$y=i0HLHU2hD z=36@VVlmBxXuATy!9*{Oi{&|8W~XR8VBk!$9v{6wC2<)|GU;&=20$QBV_z!GregpB z&r(CKhAb5ZRQMu&M@PThlmtRRu7+FM7hI~Bt}*9o7;i#-&eGham^X`|W=eU-{IkxJ zP8RJ;9r#Aaq@KU1qfK#eqMq_HbI@;kIAKlz6z`g5f%R+s7kUt&gyaIUJotucor88ljCes?w5A~uE-=YyVq{SP);v@ zhcL#O#wdBWbRS7OuaIs-|8;5QeW1*DR79nf<6#W;<8kzV{VWLEcmiJeea>T8?5?BZ zWZocdn{7bvl9CyoZFnlKjYOWp#8)OWj>;%`@x>DiSx5^_GJg;5Vm$t_?=&>O@+lWL z5Z2qD)gWhx_qrT486xR@)k;MlD!&IF1JRzEm!^-fyMbQ%Q>(z%40GMFReWg?Cvun=mrb ztBa%i()E{AZ~u%tF;CzW?~mN1Mf8MEtWvd*z_Us8Jp+@Z-)QkEv}34QdXnYfxkhf`x|{Q{m!yfTFjNj z@H8WRDSYUtYYH93j3IxoRJJxacqYNP4Qc`)D@|3XBLe0?+f=gL9KJG@5jBOTLtMpm zq$_f6rf_a1#79lN+?;t0SI5}S%~afhePz6_4)p=wvO^ap#dI3xJRtYnw(#F) zt4qWgrmXZy6`su4SlhiZ#H7fuO=dKuNL6y)&oE)*D#x!Jwb}!iMO~(Hx?hC0MZ$ag zWOCcUT1iEZ5B7}b))5X(mlbq9<;uDOXhksx2i6fyrep0G(E+}5AxCMv!8AoV!{n6! zDA-2Aq!ED5r>kpyF_g`A|~!`TVpdh98#*5Eu)-BL8#B+*dhdf0z<8$e?gOgRO(2W@!eV z8UJ#OK04m;l|9*n>yXxDQavZ)7l9@Z`&YVy#GJu0S%&v?kdLS8)xZ? zmZ>8LIftXjWb+a0R}hNKGOe*D+M={$pomO{^SnWSjWM6&7@j_R!hr)(GGB|Y5nAA#xq0E5R%qWmJuRMQ90U2?)O908 z&51t>!iXo6wa%^c)S2(j-MMC*h&3}Ge~PSujubb>G)dF4K*T!j~=0}_&0LC{OzSdjb^|mDu`|@gu}mlHphQ`4Xwf6sX{F}v#DFe(FzL7!na-#KM8@(40?Sk6&P zC}{A!i0?@t8nL<@==b`FmN0i%PvGmTux4 zFY>b7Tz+QqSD&$17WwDBwy|r@@a9{TTdSJ3HlhnfuQ3R5ai$k}td*-{_%oUGD2Jcu z-_aza&uDk$7rA(QrQpqryvXB{`692ogE&orxXQ_LdUqBR58<3)`()YNo}4|Nf(6qd zIc`IZc{(m279}@&s17d>mf0~yTH*z?`nllEAEO6yV(&Fw&i*tmd zx;xBIbAaP1mIk)s%BHk9%4sq;u%^EJeV?^{KS7IFC2LO$ADJNED`ZdjE?zf%_)gk1 zLEmU$aomZ7r;S}Q!_#)Q#W{bO0Qlcqu^gg;ZS?OtTHZ+P!bp6iIj*`n&a*wk;mj|} z0U;aP2!0~;{HE~<`#!#|c^bc6UHLFb&U4;sTpBm94J+W5rEz#l6Ox@rr=72iqlaP# z_gG_{I%1x1S;If_P}!%pBWAi)Ii}zDWf56ug0DJ%??f?$!Abj(W=h>#CIo?_*I;C zHfC^ti19%SaVPeSNz%p`V5}Pf#$xR5U1UE3D8RcaB124>raukQCEdNF$wnzDsG4h1 znF7>clD!}_hEO(!Iv2&84QeSRC_pU}oRBVDLFDO-^V9W23%9;M+>YB>#ZC8h_-ULc zB?)C2g^|fc=?B3Sz?viZC!-o=PGmtyWqlBrR)M&5X)RIL2H_RuADFE9PMjI)_k&4> zN1B9%TxGn345dnB)`k3o;w*ooW_5yBik#^iqW_9>B3xx7(M_z~L_A-bzE?J$*Ht#i zT_5Ps(u1G~s>3$@#cdPx+a^Hd{D3*KuVDASf{Yrt&VrJH9Zs`Xxdar#d&1oT;bW&% zTbU01w9f^)`Ly?=5q`#2tI6ME`P$S_H46ldkt>pgA)S{`g?X+C;A}&1e%ZLk{{w^jEJ8 zrtmr)m1WVS5;CB&XoE=ef}0(}05M~px6~rS;oaiz7HV^ZMgWzNaxg`IRHgVO1Et`u zFaiQQG3nf)OHW`?iebK&bE=8yX$cVUdMhkh%ogHHm#I;^@9E2#s5&SM`6%wpA5!f} znTySzhjib zD*z(TAf4o5G1W3RF*{(ChixfL55!LWDH(-xlsGZ6`ZZ+=(~G6Dz>orW+vq>K&VPXS5@_gTY!KV1O(sL{qkRVeIo!kSOT>HX2OGY~)+?iZ0* z6|{&MVOy8YBCg=qJA}oZ=nDw9mE|Fl!QUeeKwu8zZP>$-R?kcRYq$Ug6FsV;Urd5O z#JAF?@S2vSQkp<7mdScr44cbQ$)RtR85)!va1CDna}_7}|CA?VEn}qYe~4`>KPdI zCC{2D8D+ZhRS&5`Y)opBKGs2d9m{ta@ zpJ`EEgxZJsMur%(EU};DQ4gUK+mMJ_M)kZ=rlXLjai}{hD-Te#Mc)V z>l(C7mUrmZkTUrWcF@-@zsEymbvEy+aTwR#=5D};Y?f^1K5Zb>gpeKgT|_n%?AQYt zM;^0BmY*wc$r(=B;BxCqmdbN{&$&-LDE^tL! zpiN3@MTxtwtk5$Fby0c#hPutq8K2~OkVKgNClDORTTgouP+Gna=etFbB1dCG-1n~? z?JOiYjHt#t3;eGj%kJenIfx$<5=Xrk-?>TH`QzQZpX6X22PU^0EQwwMo)gq866l@YMu|hx9$6Cydv7|h5d@?Q#$0svSL6LaP zOyaG43SoZq%Vh7KDNN$PjgwP;^sXqtK5Vutp=^xGt-n+dnd`#k=vqjupaq)%sZ$kogRO&Kt6P3Gq&kb=fAVV*YpCq=WuiH+qWlT7=QnHu_1 zLRNuQg0iBN>U8BMq8}Hb)kCzJubfGL#U0S1xeiYjMA1T)FtLp>8cxMEep1)~5bNYD zs!w2McsSzF&l+IVngxcHi5_|Pd2h82ZqM^ZA|C3hmZCsDDTLaofF@Wah%(lC>pQ&x zSw&cJ3x{57J+I4<-vkZZ{me&j`jw~TXqzE|u@~sC*L$5LN6B9jEYUCgIg$1&Xs_~# zrXrH}=?7VScOktd#ZyKfm={>7dDXlsoxZ{A&tysYAfK+}}~wMf6Ckg@Pp@rKr- zloj5EHE2;i!ZZ0OquSAvOGoa_odf=&{1bq$>gO5^-t^_)HhOF5zv4}~adcLZzA~0B z0Lnmi|!%6Nf{y^u6I! zEJ~=aC^+mgva5FBvw56$D}}x_#^-jvkVu^FRW7%{B-YJ|nftE$ldQkBy3Cvu`$KQQtQD49qe)ZGA^$kG6jFTbtX1_2=_sN;#w&{ zv1t?OxI0Q#=i?iaIkBcMIP?#f<31Vir6I>5DdslZw;@|0;Gr|gFYm#l_JusLh5i1<}_L<-h? zcP5r(x_=pq&L7X#x|Z9W+cN0Jm`rLFPG<)W+&;lJ2z!i_1x_bVN|~(6!Ti!tBx~;5 zIvy20{;ES7$f$fJ7p zKat%se=`^lbOEPE#k?mdET1%%*ex{1|2ML`>^#2*=W8T(YsV71eHwzmZtYlLH*L}3 zbyCbm3lpcGX8Lp@hYg-@EVUchi?FE)+UuBv-{ZQ0xeC(`C5+S=v^A3*&FhBn3zae& zV8L-~{$pM@(~P`s>TjBJIj~!Lj*H#OsQ*t^w+I;v_L(>t36~OO*ifLhy3MsPMFt?| zH9FA2dYDrv`=c)&rcWhvqXhL|bOb@&9Aq}(g>x*il5&}DtJJoLQc^d6PmHV|u*BAc z%R$}3!^@HZO&g8rmOny6*`%qem9FLKi0BPUV*M z^waS%o*MxsASy(1lQw$P!T-9KCZZL_w>Tc<$zf<$b9pru6Y9G64a3b*33 z8m1eMS++IPlx==xso9ea+H?8=5IOU`!|!3ILV{W>~OnD$>K!7-{>^dsf8Q!^>ft5KRX|FTuztX@fVUxUk>mu z1mu_FrJGqg2U#PV4`I*uNi0+2+8&FFV&-t(|0`blV|FZFIw89Q#Ss(or5$BUsntPM zRs5)&)qodhXsnvYifUnNb;>TQYVkl@+b8Nm7{*E;+}A!U~mPf81-g0Z>4 zXS8vmJl@epbSWQnjHOB^o@6~;19VwCvVFV8f_UL3mtv)PwtMdG(O7AA0$%VJmm!2 zxfe$M5Fm~It?|&4gioh}HJ=5Yo9@-+{0R-J5;bY38Y?1CFXu?hJZ&^ysp}?HiVMm9 zPRRPC{fGf%b_A&)Pg{!hZKG#6##41ITiWketcB{>r$`jht4Wa(rPWfRbXUsY9}=aj z{*Wlm&a%~Qs8oGYh&E1Ju*KJhy;UbRB&rm|pJE&#WB2!H>6BGcv~=@-qop0|Mnj*0 zI4?_C!OiY(_tAO_213W!K(NhYkR2SR_KEIIarl&d7CQCgxZgsgjq9yX*#9R++Otn* zbi6?vh>?B|<&r+lO2lJz^7ka-77@oS4Xxh;q>WC^hvFX-r17qehP;j?NZ-<@9!-#@ zbL$kpM@UP#;mj1(+Z^fsDG)%KEWJ zEzY)=q0x*Wb5zu!1*H?(lLvxfZ2WqI8=FbPc1{YIHl9=UCd?d-n9ib8k{H49B2h@W z)iO^HT|{+l+-S!1_{9HaOry2d6gU_9IM7$ zgUQ0+c!9Xn$i{z?h9fd5V0y!UtWR=Ph*F~yI$6|eRf!HH=>C>6ec$DjX{O_UL#91S z?P$n!57Y7Alxfxz=<<0~Z;(36qsiD_`Fndu$Mf>b<9RG?IunhV6(DF>bx8=%&>7Do z&mE5~C4?yB?rud;C58Md3?FoXT*cKR_M9I{mg^9hJeqLr?%*!xN>h0fqJo2CQNbAG z`>8evtp$3V&rW@NJq*XdqR>|-{~=delnx|aQm!=pDTgVqdRRSM3YHcsQyn-%Tuzo| zkt}WVtI6z&9sd{6(uOf2pb3%GM!RdJBT3f6HGN=LG;Y^&?msr8Z4>0+OWzc$@wZ zEiLm5(DSUOn?<-ZeNZDG3zt6B0*VxJvJoIa`&d<*Cib-}SQ=$#`@=)G+=SN}iPH5_ zqBKU)ghNM&ZF=10P-)7tSX*fK0$I49E}EwL-&E-*`$zUtyHasEQQEDNdCG17QUCu1 zN?&9Bfvxwq(9#0Bs!8#CptP(C@q~%GE^N5z_mVyGzk$;HW?t2S2y+P1`FK5EqV}z5iQ?^yhK^4Usn8Y0)g_qyTC9l#T@SuUQc3 zF`6Nbl76V;_OH}4vDxbT}Ye_u*4M}V|!$r{s9DI7vrD7;fc*RJP{6RL+M zWFD77ZV|$mTVzLaba!m!v5ceJugrqR-EV7wCI%e~{)G(C=3m4Y+q8=PS7uLh7G}5f zf_GR@*j(T*EOZv|r*idwn}|ZC{vY_Q%(cMg_6S>y{5=zdU3q*UL)9ne=mS|<^>7%r zNO+Mv(aJ=#g*K7)t*3=Ud|k2r8ZR|lNJyVPB%2+gNgDdTF&SD@*d^P;kgJSD(L?U! zx;{ijqD>&LL&;5E&sxMq^xOGrNYa+LG`Ym;b`Z82klu?Pwglz#j&yp3SGHQ@r=*IZ zL6?ZOu?U%7hTkPt`(YPG%sXQwKKkc$)OHo~WiFBaeWF~;&o7qW$%(7Q>9lY%W;cUE ziluvhfx0M9O8fhU-nCw4ot^%O793{q$>bgS$dm;52rvjmQ`LRUMXn^+11Jql!@~WW zld-F*=xMfz@(R@0`>YkEaBm`d*`7}ytsqX`hvJLtl!acA<9*WtG=}*tiv#(22lwB~ zB#1E6<>Xr;!_4oO@?nN7O8<<$*hQ95^ET2~dx+r{_bl#VGSs+vmne`$?)r(OW$q88 zxoo4)DqQ8EA5Ape;DN4=l*+`pv@?UWl+{BHz+1E{&^!~n`DsZq#A9?6qash=%b+WZ zNl=j|_kp)OND==wt?ULEYxTcmP**WMx0PlbmapNjvS^{_N3V`Qe-&o~#hxA)=m%^4 zNt`vvjcQ&!^D z!jTLXpd7=r9;=)FX_9e{JV0&8okQIm@#5|4Xp)7l&#JCw5+ItzEFuca7v=Oqwdrvq z3))yun)o-TksuvEm+qe=e+Oql{o(1xskHTKd9SU9!Erlvy}XtT$j=y;sm`!y%}~{5 z=22lVGk+C39}|mm30A~ad2DHk%@UMb6h8hrFV3y-WQo*{#_%@MHk@wkBNeofHR-LS zME~mwp~1{_9=sak2EbQ!;jODEDJ`-L2T0D2SHO(vzB+xB@VAe^*FfQ+EjfleA(L?Z}o-7iMJ0#P&d|e$i5AkxYi7MLtAe z;3u&l_0U&iWbFZA`CMcx(x2AQBl>PL>HThhdM>YFWx+0Hp}x6N__v21%_bp@m4DOb znzVz%X}p(iUtDpob3c(G9Qcru^iZ!CwQ-L#o}th0AT@pq(?{xP(R5ZKBW=iLv&6E@ zH!B5ewp1;gm_RpJr#g&{H?~;*yxC~=-@c9Bbro~+-u?EubrT_jhqVQ3Y*$XLZ(sM* zX_)kSng^k~?xU|*aT+~7Pd7ngWCw1ys8O|O>!)|wF-N2;Dm8vjEA{ljDbCe9GQC!&%U^6Z6?R}LYkIp$d_D<0YRgK+mGBGMEE|6OtUM5|_rV6kdO%s;dfXTy#d*UMQ% z-1bV=(7dU`DQsQ4pT31FgnRL2Xzo-}V{l!sz`H?TkgiR%X)z;1hs9remAS}@bz{*~ z(qszX#3x(5wNsgE4&HY$EymMLU$9l3Vq$Wh)+7n1CIR0iyaaQAOIT_X?z7RN7^KD@ zwB>iICHe`hi7FP3Df*Y7lugZVoXY>WjI|<}{6LYy#q^y`F(o`9?F$z%G7`2c?Nw%?tgpJ>*m!cH5 zbYu$5_kG0`rBickKdYdgg86?cQy|YG=I-5ODt~^ez8t7e;VYK80E=On{J5T{g0sLF zeQyK0*yXgO%Jrp+gMLXgf2rCsrEuQ8G_yCBx_77LA#}A6k*kN}E-sJNFONlo|C8d# z9ksjfs6}ad+iVu*1=y)qt0sd8pR#7=SWeqPcL{$W-A9C*`Wqw4GK!>*mna1tfS@P6qi8YuG@z zoBvWlqqYk1g=r|1BNwSV7!Fc*4L-48aC|ruGMKY_MF|?;4Nq7fYHMhgp;5=b!k(>R zQ7%0faKJy~<6o>a$ITXwu+XNQ3{wj$gIwX9lGntXUC%+M9OO%O)B9>M)j>lwJ>8og z&<_{NdVLnO%K4gYP~ya*{wer~3qXvn90Li(7a%;5|Ka}M~?%nl(mYhj(JEoOTMeRiv3uRV?9)Hu7IO@pyd44InsiY0Yy*tT83mz0o1YrzLIB`5wxO}fJiz}v$G8}Ctkrb(X3*YzUDgeK znvaaD84vO`t%m93uXa1}}%{K#}n?4%(BD zF`Ivuh-lj57y9PX8yIqkgyiQsF%4WK|hO#jv=;@=AsC*XF zkMS6!?Ftgszf^cj1(hQhk`^|RL1vccMySmNr7yEBvA5|AOX$$@HAn zBK^XLD*J2|8xmU$SKg{GP^mFau}C$bs}IcI_Dv9XsGMb+W!*mdWD(GHO#bG_<@@{( z!EzMxWaaNGYf<}G;=D>9ik0FG0-7iPGpLpSKvvKz_uz}2GhZrWcC$Ial}0maV>}!7 zb$iPAdcQm%6~!S7=jYUKXvK$NO&tkZ01LGSC|8arF=H-j&Z4<{o>~fH5SQy6hh_Ip z;<|!pp-Wlm){~ty3c3I^;fDc45}!duHet5M;@Jx%lSuV zH{CLul%0H=o?IYItJdFCruqdd_5NnQQdvGw9l34x*yV%UW_$R^-8E0#UDIOoA-c{3g>5+^43zWqnx?Y>q17!)$%FZ>T%@GF zQKG-TT|b4I?CR%=HH}xz=F5u{!=%6zpv=?9+BN2@W@~e$Xm5w%#$qD2wrl9;TGKu5 zed99!?pCOwEut%9MlF}Lf12>3o6m{m-~RL28xFdd7~@HrJ(d2f5k9(?5e&yT(@D4S z9J=D!1oxvG)GUZxHT$}&W&>$NQ`PiMp3BQGu<}XkNXQMl%q~-2mEoD7VSpF@qVD)k zz3uK|$X&m$;XS-?7aod!`BvytKT``?yy?nf(-AdYhFn)(W(#_Mxp(MsKJ(@p`JnOZ zp(l7}E^%GW1GxkNwHhV%8z?>>c@ z-}m>w{?`?Yq#?`6S)TWK?)!5~SkoQm33ZvW!MsQfm&Zn`t-ddA9q%yN!dxxBfe8A;RyA*i_r!bQqTB`07GQ*fhC${`8>nx5rYX3u>@KqLC1$siwIZl&&K7#v9Pgq! zc?Hs%FV2T*a2Tq=Mp&hWSXadaJYj6kEPBZs3~hOKY2XkIrh}Tj?|%CALOTAKeBHqG zQ;%inAIs1|Bw@pU&8AtC1GZS&W}>PauGz)21bVSdrl zsz&2GD(+S>hnWW6P!<=tJbMxGZJ5t^>4W_Mt=L{p5jM-) zH_KrwHvUibN2jysW;wq0V*@njUQ&GR>8zm6+a^DXJ3XNSx>#OU$fO15P_14wbKvex zbL7a@d?JN0AHMHQ+iPH%&0scFH(qdJow7mH{5^L0nMzWrzv!Npr5BJ&?hP~J94b#? zR)Q#utkK0^+mmiwztPc;GZV#(dWDr3y z3u%{LiEL!MDx|hYdb;f(b+y8r#k7`fyI@I=e!(S3owL;1j!A`CDYjWD-e#W6FW7e} zy(dTiNqX7A=>@khvuS0UGW7d?VM-Ff4FiGZDE6$o=e$#i>8~ZktdGX4${Vl3C4v6= zH3a-z%o$>xeMO;>Xk7nL9s5;=@j7JGbDdgL9TDykEvk#+dL{g$Z8X@RsKVL1j~+-R zC6n8=5MJYwNvHhN;WSWd=Do$h_{QG*km9ALfz>|aN*yZxBW4B%lKH`4R7W$N&C>5~hH zYjOd1{#Cfw%%}gHD*W;Ow|~4}wu@|epD%l7df8VzY(u**+Z9V!0#N0$?4SJi_6=`+ zk6Fs2`{)Ig`UB}Qn-ZZOe!731)a4b*uG&?0l?`s7x#{=oM;UOL3PP*R@6DP zx~zh$Rtqtb8S?{5gSr`>eOO(Kyk1%x+Ol8$?H9xkucaMo_Bf_0VEx;KZ;o#G=4hU` z7b45V44ojQrn81Ha~a#QsHr?{c)y-SNn>Ky#b+?N!Bt_r+0ku z#ASINSDw$29{`cZA>_Q0%z345hZ2lxzpz8HX@|snf*6YNTNrh)ucQeFnf8NBZGy;w zTolGX8XEto&ZG1qp+LCr{jvMr_kIehe-L;=SJBCrgr!XTQYNBRgG}l1z-%u>$%ZKY zslzWMrVk~i&z-OII`aI%TcF}DvD24Qg=3`s7%@Ff|9KoxDxXi4kV9<54yb`YM7A?= zY;RY5D|NU9rzGi=7{K_m{A$EJ7f z;!;e13$%xBz~7xhJCkh3@qx5xd^SYQ7O}_olbB#i4VzNI!6Cl!ajDNCyer%Hu1xCp zGgB{mQ)=?2RD;9ewc~s5r++@8hOWcq64K?#bUC`Z^(w;kg2-B;x6V+-zdC8+TS7UtS>jO%EMN1 z6zAhB@jAFaWrOe4K13A9)n4G^y~3|4+kQ>ax*)ptI)&d-lE0s5w+fVeyk_Z-0} z88S+|2h!;lmr#hfWEEhIF8gq6PaH@uA@urK=ym$_AzdthuN;Tl$$i3QlJ?6axy62a zvBv=vSh4WhwB&2kf>}CH5}iJGNVw~7V|V>6xRNZs;4D!uO`Tv8sm2Eow3A+<+XJ1x0( z8aH`N>jH%;;1r%^lFu^F>|~B>9X9L>ej$$=%i|&x2MW-jI0%x*h$1TW`t>>Y3&*F8 z9iOK3`h0Hg*&Bo|(%wZ}H^`vpo28K>`Abl-V=C6NK{)6G2+2UwB)Yd|OGkz6Z<$&}v0EEfobkPQRd+jOitC=|ddfiXgn!PK>|l zq5;}8TMDG+zcPitrVRZx1*ma9^F1npfE#WLwqsbhSlWKERO)fk>vrhe0E>E^c6!IX z;FLOrdnCzwBs(sW;#~!P7z_Nc`6Z<{0LH7uEG%H!7ckfj-1M@)2^rIp8PjMDa_Nha z6on7s$)m5Tl-RR;g3*vP8hD6i?friAz=hM(#!gSOEKsAo#rbNEY-oF;r4+I=5sU_5bP0|UAaD|J<^M% zhOb!YBtxCV?+VyO@?A%_Xrn213Oyv*LvnF3!FdAt(r)2=CV4)SzXvutfYIJ2$+yXJ z@C9+Iz{L{*VR?R!c-9B+)sGE6w1ANv)8Sko$~y6iL)ZsDCAk5oc-;`;yJyeO^Uj?b zMYt_ajZVKy=p#dYWC5NTBsH9YFc>JFV$wQ<6J+QFL4wJDFNGll%-9@Wm&f4|PLd5L z$%^}=K5y_o964XGAqE(Eyl`K`1ygcZCft#%+Y z2b@Y_zGP^=g!u>a71Z9i)B>jG!8_X2N9JCn2BQs^!g9$_xnyHP>jsI?835!p`G9y% z34oebEZ8MUy99$U6j1>Cl`&h&7#wB}kQ{KbPQF7rMLsQHjjke`!bK<_=(X5pxF=8~ zBvQ5|Qjp#5^|*b4S&}qM-ns#KW*|5E{F67xf%wKo+k}f+e0L>}=fwSie7|Lx5R#9D zssHt|Kdo~zf z1Pmtbx{xvFVN4AX@)Unlmfv7L2V8^DA@sj+YW#n2YVOe0ILC{4s<9~3Pygrfa6>?eb=HcPWeCbKs#$2h`lRGIuxer0QztC%I| zrrss(i^>)xD~a<)HmqW9)NI6HEYTa3=fX5#J_lLrWBwPOXBJQ=XkiWZX|5BIYn~tf z%Uj#R=&Z+7vL$4#2#je8#!Virc`9ATbgAW_m@Z@K)*M%h*)na+XH(fSwiG6Fb^{JX zoT>32kh#8M{J`1GEc;P(r@5PwqB5-zO@%dZH!&9&&eGcILyBt@fG?cmwOJkr75q?P zuIU93W-x)Q0lahM>UKxlcyS&X;TuqZMqA+qK)-i!KfYN^{4Rhw0Sp4sM+hWMCqESc ztvF9*zX;BzL~n=amhR<)tzg0QK$_UjH&K&fJhc=d`|Pc>La!`o%zN2hwUx~LqkJlpO}${spij&(=w}pV{`EmP76*Z7i}nW5nNfJKWxzRN z+T@FB8f5yyQ8Zey6IKAj7T^3z9tdr%u_*SxzFt$U>>c3M_zc@01za7U*UmTu3yyI?}*3h&D-b8SJLM3$vLwAcrw>|$Qx=nn6y=kh! z$o*O21gBhKa$4qzNM+L}TAY#rm1Dcp8RN3|0l&Nj-aE&&7XVGazq2K31mw+BN={&Evp()NV}IAzTx13Zv!g=yM~ zK1jw|nUS*y<%`Yqmo#alJGjz%gMztY_hEK5GS%8z>FNv}z8P^1$R10>a$R$yv~uz9 z_s;M`y4oA*>WPK0NOT9hf)4IJT8ioxFN>5rp!QMkIE7kuvR19?6WIn{VLp?b&mhGI zQ2rhiH4e&U%0bPp`bBtxX@7zd7gjjY0UhlucIMHYx_*Z<-|a4OmI7>45{kAJg?eM| zGQ1gZP0w=)kr`u=8HFfUdLamjMG$T(Y|2V*$^ym6WuNlCLg;$0#vr zdml5;RcsF-Q5-E2;9L-1+tgiT0Edo)B*%#7;<EtfYM8l;$M$BP3Aia1h zA=@yPZBXOu>1qw$M7%;q>R3jq>H`E30tAfDyh^%hl{8ljA8-@}uzWnRoEgG9zfrg0 zM%~HRX@|bh?RMlj9r=hda5%649~&ZC(6hT*!=@E#v8cxykV&>Mp;2YjiGsB=E!mky zA2YhUY#3Gbb%8GpRJTjeDw0}-SfRGIwN2b9NAW8trzMr*+SqUJF+GCLjT5ACH&pC? zx65aFK^RWiGMqvpHFmn0($CdEYRBS4>&6GRcb)u(@9VY!0q#NtKR{vwh>5m6BGX|Z zhCR@?ts#xxtED|!(pRv7^*%mRr0o{ivh*swXg7_Fig;ZvLtlvv1IX^GZa7qX;~+=b zocba)D>DED<>>1L$OKMZF_>U9yT*aGWltPKVW@*JU<#}CM>Oo-tb&pZ_OUGHa=~Xf zd+YTX$OWd=p)7Vg%e|0cg^Jo&Vl(KEDV5k<9@a+p*$i+KRHKXisyeg5dS0gfN;V6M zDUGpuCR0lfWrsS-uP0f>t~ILQMq}R$+pY}3YA{<3u0QF)o+?_B2=$s)7&qv33-Et{ zOJ(c5tFvT0<1AaN*VT&vmEj0H*`(L%*3TOSEGpV7Y67Q7zJ1;FdF^u1MhMbg-~6#n>U;I|T=92Jmi0aUQx# zISrCo#quRqG&W7a$)T^^k<+CT_;mN0z%GsIk#!}jpX zMi2^i7wkzBXYTYqULJ{^m#ts&#Y2CSkVb0DQNqVu=W3S9;eZ=oCQ2k9CycrNaUy&o zS@OxmduH&^+9tln2eG|pEZdt>@}4ml6_%(kSjU-!_l&q5d(Wr?P+7EyvoDx+=cW_f z{WF(}YiI17JgmnH7B%Jb<&Blqd_B#`1q?6AE;ewonPuyZQxbvGbN++2eermq(Jj{XD2(x=>K^AryfkXR?E}T33tdTs+F)9>6w43v{eK z1WarU!_~Bz^lcO4zX(OmAY4u&gdmLi73`kAFt|}evX@FlJ}ne#%bMml+3SN%LCpqf zk>4U$Rzb>_eN|p8($k0M7tQU=f_WIcpcVq&`Yatc9?514q0s}g9$se{u9-qPicHfq zHP*HmcV_20MA;6U_ORT9V%^MbTGaTk6WeYj{MnW~0KT#-^SUv8g}f*s(u<#(qNC`y z^JvYWjPB^1V*AE%_)l5XF}DT;LZ%(^5ln*zWmZ&aq1ZVcL|g_7No@loild$qk(JCl z^fb?-))Vbpk$6=lvl~jXHM5mGu(R|xUX7!(JHn+|)OZV#nCmu%2JruoUq!q&0sv%9 zFJ2xMBHU`wLl2u?c3{ELD-N}%Sf4=&I0&K?LbgX8jg3o%RNwpYw1WuDp);M54~pDKN0edl}l)XYJy`73(S;fqdpKawos0{x1Y$ z>#Ap&KdGB*%lV$o)0BBwn^zq(V7(IRw!w*gOkW5){W3rgfB=9b@03|L1%`w`?OUY* z2!u!nNY&i)c_g1!q?ugcSg^mDmG~41#*JaR>2htT10M7a<`gqx8n?29#@sj-{&_c7PWF7^JK z30{S5-_EvpA>BMAaUdKn#t#nni5X9QH3@U<#`p&mU|eB%_^Jx0Q4U0F%fn{t|EuD$r{G* z$iw=h9hWeyX~L7armLiG3-goWC0U+sO(fEfWpC(9IbYF?c!y$(D&KS=(?r@V8EPDZ zVAPhVQCN46Mm|MeSbSoVv1}e^<}sA2%&u%H;G)ef!F=Wt9oE8lfsm#^Ssn`|5vD2a zFecWjN9r0w`iU zTiw?Rx~(T42l+Xf9}hwE#M;TXS6(B{hh;Dv_vj_Ijs`xEX9+?)}vV=;PHDxwTRD9!9hZEq4pAJt%L^0fCIG4|B zTGm+I7_7{%IzrkfFL2evG6FNJoOFkt(=JX{cPBout>J7yCvch^j7`)uUK)v?w4;Sb8q($kp2fHk$N$n6TSU(du2iG@CguDL>c zbK~qnC<$FlovtbVHRW@;53NGu>@AJ6n@7oK__>&1za`PK#@Y1W?8FK0CTsAt?Vn8J zpRLA#{XbwrHY;D&Kv!7x_hmq1v56L^s*5nKa4Oge8zPx{8H; z)?NFowo1v1x7gmXWKK<$|15&4L7Y9Rr(HhE34Nwg7jSj8p*H9CrV8&f;rE$kPa$c;?>$i{ zPW;xIlWWZebk75RJc`~vN2p?ks+g)OZmk@%NE=%^8y|S+V`8eau9bsVyI3x^{NIvo zdP_#)q3ixJOXtA$gyCE`ZcQGy=3;2zw#VCT?qaq4l(Wv>jw)yA>M9#*kR3XK_+EOur;N@5ftJpy z%Lb4F(ee7D>U+{>nYJ3lD2|@C{wRAa*Swu-?Fa($+Eq)eXVm)6bkyp->9y)vwL3|c z+xP)x=h0wQy=E5Gs>{Di7p$|It+SNnXeO}N8jUg|sirTN%3+hr%<|S}D{@SCl)PZ1 zDcLuFVpdk6OV5;R;(Ct`g&^EZusY@dTRsSO6)N^9>*X~=x~4UuHHqFrI{!TI3P88w zpC2O3Dy29<;jXB!bA$9H+&^!_{qsQ0$1)HNc&E>^c@eWd_z;)@9rVqaxwVl{Z&5{e zD1n}@_ctQTt^ZYIC3r&?TQwYrYNmb3@$g5v>M7}J-tT#3Ef}&k9q}fJv^Md33<0)r%f*_L3z~6`4WXHph=+>mq09v0M>7lT zgYtax%N0b86;upcCReisek5c~^w0?%DWZR^RCk21B*?OQx032Y(zuNT4ZHtB;?~9I zkw7rBJ_2wX2@D@je^Uo{`oC4sV3$!HzDR)3o%=&d7NfFnD@DhX#_H9viCRrL^aq8<(!19 zTfz8)VLWSx$d(9V9@$nh!Qef9Ml{7QDI^nVxDM2cug3eO_PvncHJCh zOJ{U6f0u19gI{zIrue1TiF8GRo&qniBP2N5v{&q8eXxDNLxCOyhP-S6Qh=G+65pT$2+TEBiMahy+jds=^ap2 z`{+e^$`y%`=*dm4Kg8EvBff5wkPBoV;GdTrmMB>9>F2x@BJkGX>E|cgkPhsPJ(9gG&@Y@FADkq}CN50z7`6j{P_A+)iK z33^tAN?f*yqz8J9Wf7J+m@4pVP)P%*0j^U=Z-{NMGB#!XqDEfv7dPKOipm5Tpgriz z@|4XcAr2SIV*0cRpf2F7;-V<%ha3k=%68OkD7t~Hr*q~r2aJq-!gP8UuH5W}*|elc zJ7wiBR9J2AtIlylzco%=!*y0Kz<%b#S=5c-&?wMVe0>7s{%X<}YGRG)&paa96JArf zL*ck+Pq2j&c2rOFld311%9su*b(L8ZO*7Z~Hcg=OCa$Osy2%~(Q&Y4A8nbR#`jr2uY+2KXp!%G*nM(j~w zCG5q1^wSU%XhV~>9+BVg^X#>}{?I~M>@0yms_-0Q?ymCA!2#lGAjTOv*c%GU)NJ#7c1h|KTh-tx6xph0 z5$526)-|ZjVme0f3*amw+=2$Jc`Ap59)}r+D{BBG^t##d$l@@)v;g0i>op^TC}E^2 z1WQP){)Q?=snooV@s$iznd~A|HK!r(LbKC_L>+%I@LkvAKC4^J%+|N?#H{+5`-_Kr zY>8R$5OdKg`ka}sK!*{REk3Ac7X~YMT(Yn##IlQ5Ai#b&9Ak2V$>KEP>G;mMj>y zVBJe!D58~hG(Vq2+ZPdOv8_m9*<*@8#@=7hRcF4{!lDj(e75zgd~iB$XlTC(C8O%X zKr4I{{So%XIk~XsbtBB9yKj&?Qz-BEx-0|IMHQy)a%|4=`>eOtZT&Y18Wr~Oy`;nS z%fir?wQF;k8Q7qOixRh=zMcI|i*t&48HqET5`A(IJLDiV{r3l5oG*{UY9iiY?3XK7 zVT7qrDbYq*r5Eqx&OK7Ts(h8XUk=?LPF?@K_uTI%HF%gB`1@!to@fdSHT5zD{c@?@ zY5h5!2^dv%dmbk}WgF^!Wq-BX!2_tOcd@X01-K zg&xiXALtDBa^^DQOO`Bl*@~=;5;X_Z1?#qE@#ArGxfOre`(Bpm!LKlw+P#f_z37kp zTu}y&A&JA+GT{S4cWr$whE`JuU3$$7?zl$riFsvxHGHB@SIfJ5rO`g)-54tCFJE$E z$#^Ftr~h7Jwx!B#p_r+I%WY5=7$*g?KM!?MeAyM+~LVk&in9X;sSaCp$s}at2 z5OwzTCIXszZ-Z3vmgj5PO$c44OQ7guZFx#>iOXxIOKL}@ih0>zDJn{^tng%Fdnzn` z}tI(uO!hM z8p;ovWI9`kO391UlAJP-{k=YWk7Ibi;f%4a8TX{mbRdN-l8m<5THDYuXNMJqgXjhV z9odVkL>v8Kt{h_Y@doCzP$B*9W#R@@^szdJwhMOMBq~k{nV#h7uNpDmD#o5p(_wrfrUZlz(if_5z7XQ(v$r4P=KmWE5Spyo=xR?1 z!oaF1cqGJLTGaDv?nqw1hZQ1%vN@yto_Gp9sTPx5=mY8UvzY**LF@gMhM|bPKN__+ zG2P=~L{7`1>?Zk58_#9})ZS!fy;htb7mYt$CWZqnx$WfTq5A(74bbyC+HW7 zZ_16vlFw2~W~pSej{K5Bg09?odBFOv0liI3ms4bUzfZ}`G5y;`^*E}p%ii^#S#crV zAvamHRhTn$MwpzdmN5+t7WIw%k}H0Xx?4) zJ8fg`s>zmugj|1Gc(=v3(ccU!hVxR9>ZOzLnHBr>M$Z3sO|t z$*+0@_yk6o--8!2>VYdy>kn7dI#sI0u~pMmx!luA>nAHWMmN-IYTjl3OrHvIU->k< z>a@&b`q?U_7z+jwQ$fMo9of$6E|vAl86eSuT*53=JfV-PG(oy8efJN>=R1_OHBA)- z$7`5?jCZN0o5J{UBP4e&d&C)XdE9|wtVdjJ=&E1<8*%|l=kod;;c%qg@8%@&_MGWO zA@=@^{0k;cp<$&BvFMTY7$|@o7DdI)T~kNbDA=ax9oSHIEb;@AffqA#6cy87CEs)vq~4+}lVrHgB#|JeU`)pwD-`ummK z?`DlV{H97;+qe7Py&YQQQFnC2=|YQaM#+aVS@wokR1X~^-ZojD8^JjqpToOuljc;L z?7=qDDtw_T`$DB_IGd5h=IIRoiRq$a94D{f-Ylh8mtiPCf8eCBA6e$+LP^w%$prL3 z#=CX&^L~9#CHpUw6d1mpDb0Vou^^~~i?aND^%MjdcoV(35|LD6aoHyz( zoo;z}yi4E8S>AAMovv?JHH)VYeM&0(t_o1hlO>b4Z5=~%Z-AL8hmPT%XsehL?Gc-Y z2#GEZfCMHyQF`+erLHWA@PUfr7D`2iM6?xwN&!*M5AT3eSU-?l4?BZ{7PR=0Dd&Y~ zw6(7T(|6Y@;qRX0-#u5L!IK9yRx}^&JtQNE`OY|dL8h8t>@EqE2HZf!=Z2Mok&w@t z8zzZyxuv1LKmEn4zKt>5c$y)A#1%KpQu@*JlrCq{2qxCCAyPC3ai%xm7-@~-3Rb*I zSnVEK?XGFjqDzQE9RvJrT_`DhPHugN@Dvk%icuWZVleIXdWHRxq5YBq-AG}4us;&B zLBoJ_UE$O7hdw>uT@0;~J=~7jd%y=Rf0}nF&5O#!YkU!)U8JVNkKh4pt+*I(1Ig6U zwWt$58A1+aah~5twLu=d1fWz$7Ir{WK!Z*{9UM%CF^9hVMdPU zBg2s81EcmR@2ie11JkkOB<>hU8{-pYLz};iI$pOS^p`=G|-1A&ih+e z+x~*Hxv@=cR;G$)^Ais!KF`u=iiLN$Q}1v*4n}m&*6`)7k2x)fOXx9qF=3sqVaz9H zefbfmt?vtU@|CV(lwb&s#R3yHpHGXkd9dHJt&JNH#?8gvWV~B6(AevTv>umh-F*sS ziw3v)Et=9s(TIzw9L7W8M64~s=I0|16SNFEycFuQ;wAEBO+3{tW&f7XY8D&7uZJKo z9;Js^8q->pc@Zt6J+&%FZ_1&ud^)*!hqs_4DXomq$+hfF`KIS5{8+q`;2s&4pwa4oGuv zF1Lvb{Zn`@!9JJx438^r+(mx8QZsezdau-Q11h>+Q&n$Wox`0TWPfLI(f`9(t)u6s zBPpnaM&{GggN9BdhoT55H+(}Rj#PhBaech1+SYp0VvaB0m>@AaLbpZ@+Wp8}7;p?h6ii@5)g_ z_Z$kbKX*!fHo9?$6mPukrTubHpEul*ocp z-E?>vQ)Tq&F+xX6GKTJAwm}SSG$ZCQN#wy;0y{_{s>|MvzHS)o(Wn%EpJo09ziq-E z^HDQt^ggCgV$Rb;)7o5+D~uCWZsQn9KCEbeR#NhSA{S!;x5w#Q$LLNypwK@6;JX9s zRd$Z8`r{(%vfG~4o!C?GFi$R=IHUL2;XZ6U$g@}I4LE!{N88|Y94Kb$xF;NWd^PlZ z*o`nvrr!nq81P5=MaT_~blXs}vQMv8B9gX~Hm;^SR%$W7LQ9FeBA`U?DMn{+eDSif zFBPsk-2(^BGS~Po))}eeD>O6Dr2yRKLVGLo7WT0fN+=ieR1f6BFP<-g?)gFb+42+X z*`q6z;RuG-ATYNvHE@(aZOp#AdavW)Gy&C8-9?py4h@ z891ZO&?_3Vv3iEp9k@?`L79;3l6mdW$F@Np+m0C|=B;SDjd(mZ~S!$-Ji7{nTO*?CaR;i-B#M6G}UFlliS+uTBDwyA|4$&n_yxpJdg z3^VRfxJ^5m99px=^dz*QO;q7kbs|EC>Ne2@YfS%?_O{YXTD5Hu2=+1N{ z(C!HHyz5-pCu0_agsb~vnH$aUVcr%_Ty09%+@@c*(m3}bZlaA^uCS=+w?PT9rS5l2Gib*->F%6_!G4x+_}N^G&;Lxs5H;w=|}^3TV)KVQr~pR>!2tta@Y=Yb<)i2cjg{&GwW;$I#XWG<}-j2aeS-EEE=GjQOaXK=e93ZM1RAI7jFrtVe>flaP z377{NSF+Eo)!P3kv zZ;u$Y_$bg7Gnc{*s(O@w?L>xm?~r4ovRnNwo=MDfy-n&TiSdA6$2 zUZ(kB(??1RUv0AmWP4TG$Vg{29F45ep|*~r_eMWmjj=p?V!(7jvB!W|5A3JiQS+IO z@wc%nvyxW2Kfrte;&I?I#;xo`=XY`?>89Uq#aDZ6tD>6ya|jF#`mPzWh|Um~8wsXc z?$x^^sI%gOb3ZlZ5}R$7g9&B2xu4p`XJYdW(#zZBp|&AvtfBGC0VDZnHN7*J{ibbW zc(frnjV=!{`$sR8M#J=2Yaw)KYs3I~8^1U~A84;Rh7L&?H%$tg?WXqKr=WxG?+0mO zz7k5K&hks7vwxCD<3UUNsF#J0k!^GhE5x24T=t7)ZinZh+J@oq`+=1YMyn$N90f4rg~_dJ^GO>WO6YmK+( z(s>Fo!SzK8a3ir%+QuSc*m!K999dn9H_M@?o3z(@(N2dx=)K5FGto7m$pOn)^Z8^d zOx65j5F}CKr)8ku-&>AOlkF7(c()jxdXC$V0Hzcjx4WFg+tbR~`_%deE!it~US7_O zn@YLE=bInQq9)BYq1^QxVcRR@RiW5}68&cyDSwvhuX=Ok6JjQ-?0VzkN`yBRTQZS# z7vW@bLCxUyteVN(-*|=up9K{XiR^v-ML(?U!?1^LQj(a<%P@1L4c)Y?yWT^DG|8zn zNllt!CwrwL>koQBwG|l6fOqwIVJ$~Xtt7yfTBYt1`b*gQVu_e<5o={X(y~9!Mas^q z^TrG|c4pal!rC;KnrFAe?mv0-&}`zPJFkMGy2R-%p^t_2;k$9&k zue9mkXX$sD!MVhh`FUs%VuJ@M`x!htRUqC)IBB5BvZ-t;FV%D<{w&KU#_e2i`c~fh zdxdJ3Oy^q8S?kLYwcR@?Guuiz<~92Mh(T2O#+_h5?8q}u5)dZE>^)_aRx>UEUZ8V#*B zH0KlRGO-PKt=5OToKg&1{%Mi6P);mex(8d zr^`X^=fq#>8v*$@&cgq(bMOoO90iqCQ2gDV{j62XnQq*6*gixV;2z$T2v2!pPkHE$ z%#tUkg^n^af?sx4hK3MI6fJ7RPmC4YB2g$StI=%kiLhCm_n@}cMuKz74rP|T+N^2V zILIWO(`=g^NI~5LHA_3)zLY+<1A>~3E#8}fdEB-YiceYa)RjcLG!%|SlYu;*Ul8hw z^5Dp`Z*krmiK=khO{B=0U4Lpf@mF;A_4R|9356)4++*xdH+XL$8uPYWiK_C{cA{R2 z&Si)Npk+RsO$qPSU~z7{n#js!TWi-!k~I=XHHs^|?<6cTp;6<#3dVh8ha|5Fh@Kui zX1{rpp5}AKGxAf8BkmQ!<@irh~XBY~$KDKuB}dJmK&S zEr)NIw2xm)&ny8ti`NxhtAEqY7V8)@xq$u71SRNb!hn+yE`Fg7_TqGDMK^meLD!a( zUWRqVcmVsus01&-vCodJ#q1B(3c66C<%?i88KoZ?NZ^}K3%;81;18;6RO_;N)lOnI zrs+&4vU8X_>B(C98U=dh1$D`d;MeLu9zjEMl&6J0B@~3cbjS-Wna|C0v(hln$fLL4 zC@^6W3T2#B{Nm^vN2fOPkqHTvzPJ0hda|C zLh&9?(7QF5Ft1VLUDs-;jl~-T=T_F|edZE5oDI}5jHsVw^*D3a_0g~51@d}z4~n2L z9l4EOn1$H}D37mZV_|C47O6#oTPvOLDHb}w`$*W)aY1xT+)meem;->b__F{wVRP`a zgDM*h@GaTI>_*lv&Y)!X)AKZJtbTRX2rkcjo^7N%K9XO^S2Q)_FRk&Re7Yb52oTR; zWwf_wFfua8H&z8J>#BI944dv!+XhDl^LK?}INkr29^}2OWb=`N1?k==B;`4zs->JG z()vhULCstG8H#83tCZd%my`aYq-T`%ug%J97x($r5Q?{+;BV!sMN;0GuF9I6``~Z%`q`5jVZ~hBVk_oy(>BnE&N!H{`-XzU&7@(sm#Xx;O_1_c;E0S+ zsa`r1u@2rUD<_Rc28mp~EyuM5A5O6AjE}vqEf-UkBJ8F#(!Z&U*4%Nimwhtt#IQ6G zV(-|kPm`~_V>d;%NSyQCBek{K(xsg=LWAxj;o~R z$cv_2h@XJ#jCV#NC;utOH}#@|ee8Z0_+P>0y*KCe4XWn|v3 zRiqV6TZz#JGY>G|pES&X@df$)z|vKQkY!&h4|RaKy&6NGEr_kT_Hv>UPfhB&)?jxx z`;`%knnj6L*@#!iWSO^JPh^$9Y{&N-qI0)^Bn&-rEhwXPGJe~YL{)xjD=DoHwI+(1 z-~x}fK{X%jVaJu;9L6?_Y}-kgs#DjIyv2RJ5ZB!;MW!6z8A1oJboY zG(~TKykv1V0KK*MF)GuxZA4X(d{ou^xwHhQdoO+QA*I)|?hLynauQ@3^LtG+Wr>j7 zjoVmqH@uoX{lVY4JD0P6HK9UZsG|m@ylznu%}|myZhJO~zt}34jvwR#oB2_WyrN+l zd&Nd&@%)Bm+|INqkQf`^ZJPbftpzJpC$8H4D2a^?8+Lz)vWyzEoD8|C)qlz`PGVre z9+fNMh%Tb@(t`@h$eB$28+KU+J+PF~GG|DFc{zB)%{IHz1s4?k$K9Z6l(_5lS%S^Z z+U)d+^QKKF?MoiF{~F%{skt$;M4T&Di({049T3`+Rq+A-I6jsN`;Js>|+#jsKRx zEw>EH2P1>B0iDO@a(ltgxM^_cra|_%E_1zF*BW;t``y6}G+O-i>lt>rp6V4c^N|*Y zeKv?ZWv{o0ef1ut#|aY0@Jv|)yf(ElqK2@|Z>P`KOS^1z0?IWybR*hz%X~1bAgKt# zGPlQ0pS%li;2<+}kYQgNQbzmR`q}xHLYC;_?zmrgpR~UZnP5QCXBa-~40l0b0b^4P z#-^Sgrdx{mFyu>75VnSNbPrNYF2WreYhlRnezdS9z7PoP^MDb$B& z$`>uFfa<-!c0eokrYDOFp72YN^7Qx^s4TuBI+&|=z)0`lF0L&Y((2D-t{l?h0>t$N zUH35bt=#sDiFGB_zan-qt0_IMr>|$bPujuE)BO)<+UVPyNXrJg2=DTIV-Cw zreR!{!lWDO?CDFep$yISNW|>G@-T@8O(zXO6YqU+vOwltGxjFt*a9jte^{4XwQ9Id6vTB$7dGv~1-?vMvLv)6dkMoOT(GF2L^>P;U9Nj$o;M4NNV5ikx zjwR2uo9MwD16)VoN274@T`UER|1DlxoKG_yiiKo$<|4X+r)e!yaJ^nS*T?4bHILLU z&d`q;EnM(w=8!R?#rxZwf{R$X)Mk8p&V#;cjd9{KJyT-c$_ma7v$I2vOC|gF6@U?x z;7VIeAH1A9rU~D{W*@A271$L7p%$o%L1rkStwHOK4D2CI)1}U0`anL3)OQ3;K6I+R zpU?Yn-O2@$BOjzlT{d==n%=B6{YA*{*p}acf~J@I-c;B$iuSYUtA@Rp{dZ~O?HL?{ zVEKV;*LOzLQt?`{sNk*Vt1nGqAD&};e`(oW41MG3{4Wa>HFAvA>0c_@XKsP-gT7mg zqH79#N{TjTuuBw-T9HodkzD%5%+e|JDMX_~=h&yNe@r7*Qztn`KCOFII@ekCZxSsi zeZJ&^6xj=|^(%q{CACa+53_M>k#1#B;#z|77Yv|Yx6c_^M|Uq_nQl(W=fa>@?Cx?G z4TNcVu&Ruh$6g0H%2VbeH~k9Nb;*Tv%^>W`;phLcRgAW#vnQ1(LMymS=F^MzL*X8y z3!l}WT3eF|%YUpR)T@7B7kzY|?ak>M7ZbB>&veN8oN{LUptlkRRc7c^F{rw_kbu<>I zn+Bf=uDofOI9xXkn-;kO(-}5Y0ZVnE>}4)1hgq9HSx}ZBmF#;Lo3%E*VGY%$>MPeU zi$WPxK1}DkIkim&D=|H4Y+jE;V~aWQP(}hmu9C~~Or+zz{sAiSgzjLAYI1GxKG{O8 zwqz*!oes{sP`4ee6sy0#v)|r|VK3&4cCb+Bg+m4R^yFVqJ~^GZPJP-D<_P2&UJc7lbOtoR^iwmZc)Z+VTtJSx2}m3nHYuCiOq}n;&$2^i@Bu zXuo!xa4Ru-^UxRiZ7WjfV=csaV!5=~fkco28)+NZZhk7Ha?zVryckOjqqft(@2h@w zTc|y1(-&q(jr7?ewiIOmRi(<;?N1eybEt9-96Y@?xR?6c`j};cx?)IOpo*WQRS+%wW^5UKzS`?KICYeP{FS8yx>u2%}&~_rZtv5t-Z$r&C z3@gm;x$<`J8u~iMq2ovJaZd$FT+zt*pkE7((2wqr_jE*Lo2aamX4Wg$gz(OPFOY|k z*YMIXeeX610j!ruheE8;Dy?Z_Z>$i9yG__aDEgR2Ll569E`?*3L>_SW;4jQauPtWs z=_2k{SatfCe6R+v6;4S*y|T->p>j32wm2*Ni$`m|ud|0fm4=F89?b=J2TL9pX*!uj z+IBA@lzGk94n7H)YhPz?golyL5>)np>%!#VURav|a_xnl3~lS~c?1y&^U?3@?sh(j z#^W8Oxp+}*zGQ+QrXvS+y?|L?)LZG|2wdpXE89LZoqoXOcQBr66`3q&Ej; z_iMfF_(+36*d}arJ8k^81W4*O6@5Ta@T-z;R^-jWh$J3|Vl-01CDu-KO5-IZyW!o+ z4P{Ar-sT{NMWIpH_DcBZgzTG!Dx7*c@G^Oc;Rx1s3!yge>+PwCK3I02jMb}bBP!Rd zLdAuO_f!_g;R>O8CysQ1@W8V@)JlT}q_OdO~W%hOrOxt{y@X zJEDm0PQ(=tpP1P!q3yp1Kv%$M$TcyEY8YVdZK_SP=`nv{IF7u^qd)L!2=o0**`j{i z^Ca9rPqh5J$ns9G<{!zk*i!X8PzC--rHz`Ty+MwPJNuK0s#Cf3soVA>=wk&?YbxX# zHw>_G}xqPhpL;hys{*cD59TrDD7Cx(|NUgYut_{U2m|J&aL!@iFz^qf)p0n zhm#$`9|7+lfgL;bgENMH3rsA*N-rEWcD!Z$*N|;{H2hyQH$Rs7$M4Wu=v+rRQX2 zsVBSDvZB&cr*HRZ_dCDWg`M;Jet*CJn2f`j8k&E12tMEh#W-)x2e*0bxOr|9F7{bkf3 zrBfA5N>Q46&P>LBW*m@lR)^D#2I|VUqoTm0qQKCbmSduk86Un?$u5K~jUl?qz%sO@ zu#CNRI-NNa5d@FT;H0ZmK==0td+DD2NtmjeLXd>k6+@dE>T9ChWUexuji6Hfl!a5y zq`^ucEaES!9#ZpJm6*0jF&Edr_WTn4Wc-HL>(Pz*x*~imuOytj_>!%#kzD5u%4rU^iDZLhNY` zjXMcfujT&E7Mzsq`Y8iOwhhUE8Ugucb>siof~cg0%&P~?bZSQu z{^`thxvB4Zk;BRZsqcsmWmF7haGUG-=RVNc9d6X}1hm;1us%ogfSd{m$SF;PN}7da z*MA)wX}>BKNo}{F9fnnu#ZEW8M!9>SfB2F_z6Af#<;Vecz`o{VAQFBX9r$gOZFGxF5SlL!&sDDB7#9u^X zCk@0-qB4W1v4!~<&_o+Ic>=aXjh~RTIx6o>#S$NBvsTEQqN_6pS7#2bP=;h~Y*F=1 z?_Twh(Zk)co^ZI)_@WFU(~jt-*nv&4<<7AywI_k;6*(>R@*~)KemW`g>7*irM!C4= zMbXi~z-Yj@fbI144E1BP;fgG^ah3ggw$f2WH*>)o{+Dr@S_)0en5@F{d9bO zir&&U0W}+SDs5p4e^+q3q}yRJh!NrdRS$2-E zjrozx;r5x7OI2lUz5!oI{G;7K5U{Ei^dHR6rf+D`d&-7C82dRTandXKfJfM^E7zt* zhjnyVM?&u$ThC9;sJb@BmLr?#UyAOxm!HsKq^=AGtcAqFWVq8dYx6i|lgQgl|5?gd z?RUi}7q9tV zrL=4UH+2nq8ATRioKN@Z8prY*ejB-P^s}X`lxmit)E^nt6td0#m;GYF$S8q`_rg=3N5eafS`$RoBdF| zeMg^7MDQ1EXyX}vwo`vJoo-Nq>tk`d$FeXTbyn+GR7b!fV11%zxH)Gy`{%KZZL)2t z_*di%#{!R|=R@?jE|J|%;~g7ck%IE{+{vD6xxMSA0c4Vmd)<`|i#)1Q`F8$Qo479e zgL2UiLTcq(Kc#28VFF6GAUZGvoKT*M94Hp9?Yu_yFK0Jz)$Rut6e`ia{XkE zmCM@ty87s;_xa`?5C$7Z!8}A?3%)?achaU$S;*x4cY^`g*iC3yF6wHg#7*4Mr56U4 zUPwhQQjXPTSsLv0CE%(vo!BnjiI9bBB2Vk45!-7_n(JyE;su6cZ08g zm!6-S)7I6R)7-qo8a+EWcy>_a%%|h_$?SQc2yH4H*i@(k%ryu4PzRqJjy}q?JPJcD zwfHEJ zf)v?)73XIWj477KF@a#FasHc8`Ts^#icKuo3rbDu|K?NH2x!V)0Zj=HrZor0-xowi zD0YDT7PkvD9ta1c%SdDyFp$Ejz%Cmd0Vjwx)@)~f%|4<$LrQ-AoTT&^9JsFVL$~`X z+osnFzHd8lH!fiX31|@7L^g{H3{}hxmSxf9B(j{iap*A>HToktUEwa2HXE2o15<)o zG6%Uh-2#fz4eTS(w--p{0?Lhc&*Mpbcq&l6qa7sDL8ifj+}hjKD3l;!n1<;X*GI6M z%*=<>Hm9+@t1ASSrp=iH=#Vpde@x{5n1%n>VH7Z%5H3b9k>DkQ=W*r>2tVN(bw@K} z1~OyDPM5=qOO?b9lGs#mr$Az|;3A7U#RE>U$Z8v^llj`)T8C!o+#ng_xYwJwG}wE;268_m1$OMx&SHMH6EN5@T?7bNtR?OwG-)1I@9_IUUR?@PXsPD`q0a zOzG)Z1$348{YoZM2~m;FR(L5c$&F76$#V3wxWUij{0XWSIQ_7@lk31{!cnj}qVm{? zJa)Q3Tf!p+S}eHQ&?V<)V_qMC7vn!1E4v*^Pftw56JxBMj>qD>;%ueopz#pi=8*BQMzE_8DoJnMq59G{1IAA*vh zRuG`ZXi2VG@laso4B%3EYyBb+G@p{E{||zu`+pELmzyLCwmyWGr+nX^?94IHBU*h> zMGF2kOLwR>2m5*fqx5g@wFy~mSh+%6GzY!LfVf%d(fu2_DK3ztCH=|U-0-mOPuiw| z+UUP=n~ic(!Aqq{o~y7;@uzTU{(rDGnZy52)F#Q+hNb_1P@4xRsb=Jpo8TH&rFSGP?c4zDxY!JAcN5&cFUgd87OI-`*X4;VlXA#?R zQ|dTPJ2U@8IZ10X_Q~L3FnPOKU^~-luW~qF`0pIf*Ns|Tb2kR;ZX`xdB;GTmTluX3 z;j9n-8^Y<0(~WT_t9)I?UY)51g&LK__*rT#IH5-jXUWoxm2M=(Uf zQQ6vupDML5xGN1;U?Ybg86hsk$}e=4V5Wotxr99)aPJ9>?b1JqtO^Vwaj~)&1%*Du zO?KhOS~$(+W50{&)_$M3M7cDnE`ZGWu~&^;UHGAzo_3FNi2|ReyNd}ULbr#=^;bfO z$n9?P6`W;M^L=gozHU!>N{VYaQBFLLcI$cbR@IvLKiQf8p{- zDG)7i4-Z`)1C#oO4rtYrs7E2PW>bqaHQR~F4cIwH{)T-5msEJmZQXsi@EcFZs7*|7 z;TthnD!F!wkl?p+sO(}9(=-ObT%aC?{8%Cghj|y)ki79em!X zssjlc=tTns1}uvWbCo|mn> z44Lkn)?hP=nR)1mSmiC6sl*hql0|uB_A=H~R(0$}-OB5-On|I*8g1-?<)+b36nNog zzp;Ty6zuZz=2$>n?ZaZ-GoAOcql}PSjVQ+|7zp9u?1hUODHx*VE9E9jE1QKvv@^>T z?rQ{+YE*5MF^;rHF2*vPsM(kHG~S(9YJ66d=Qe)ABxPTxDi4SI{LSKZ%)|W6to%_S z?)qA3s^IjksjjKREA&gu%Os*CEkz3A`zREMP97M~QR9Z)L4b zsBn$h!O;`$U^a^NYbw0;L*-duz$^`P1bQH%^$k@c#sZOLNYfHEEtX6ri^Wqt5bBT#suYV1Wn)D#vW3Yd8Bb=Z9KhqhYiHJu643 zi*}-nx40Fw+tA`aYX|{@==Ut_P(m(L=L)s%U$wt8zwp9C`1V!w^ ze@cjzv1q%rnzRDDGzn#G!B4C)Fcd&djiNH4NJ-%Q+jnu&M~r z^0o$#D(O!~Xb?Wo9_p1LQl6P8xghH6@eNAmlt6F_>NL{mQR7DS!r^GBe|IugX5qF# z56-@TjSct_IOTsCXa(+AT)GMD)<77?udR7#0e4@r{2HAAj$OI(AJUPp^$#b@tB65^ z6fk8qte*RSL`cmv+`QsutHIRN)n`FMcrV;(Og+A7SH|HnX${$B+`a_YS`q)U%oSwB zllCNh^|DxY>|J+lKM07J6fq(|=jZdSR0IP(NS$s-FrYFGndhufvvG+GV!$BX2~`I$ zMD$i_KpokK5wk1o>x3MkE0#uOD_3p!)nG7r;HX61mYYQ(|Jg9uLP`wjvx-Ve!Wn{p z3NlZWPt{M#Km=<_JG$Ngc{iY8AiAO2vwoqWPd0?cKB#5zVi1CAJ!FZdx(`9NQS2o*qvkx~|)U_=bB zBv#G7R=@Ajbd;<;;CXl9>SnewH;VTQNut3DG2_Pj1t&_NvcZ{&A~-GzLt0!a&|$6E zVE49li6~zz!NarwKwOfo<02L(m+2SrOynlz@h~HLl%@jK~Mq=Yc|SL((z#h-?$KaPiqv`7PyvhkRWtN z8JYg1W}Ez3X|r@FC5_3e(;f!|t~JnPJf#EfYD|75ZUQbl(C7~v%f!zh(&;TCDboS4 z7xDLp4VJ@%`102nF=r}V(P(0U76jU`_$KZOhcQ?Gk+P^IKJCu^M$M#O9K>+ZNpCft z>#7Pv9H5qjnHRr%U%3?C`cC$t;kP$3vnC7hVwBbaFNPHkhB(dfik+-vn!PSw^71dw zN)-GO$>n-6{LjB|Oh4+6HhxrfilPS4^+g|xts3_t&+rGHCB^h<)$B!##sYb1F1I+5 z;gvb0{;Lo-tYkFG9OJ!=LYZNfB^bL)7fK~R>Hoso7VugrUncdwRxvRxt{d$k0n%e zs!8?GF3B8mEq^kB`K)}mk(d@GK!mtB0V%4cXp?$llRBw10MZKNd;B>}w&8Jy$?Wj6NMGt58S3rrvYV{2+CLhUYWqAlaN@`MfL^u+|y zA|F%;@YI}<%L*Xy!#Vmjg3xpgzyWApnS_7PvF}X4fAPY9jXeN=SWw|rLMNwD8b8tp z6_)kt(o5hc8UMn#xj{AaR>7}Qf73$;(!|WfXqMWPrAC_<_mkJ+NumAYnDwfnP(M02 zG*g|DLRwr|YVkzIS@?@$@KZ&pgmIly2wKftyWsx*gpGbFrC&;Qs3Ia;sKsKd<;oID zzn6|&Z*Cj6LCyVDnzn>2m&lAdRV_9$@V&gis<7Y(t!*>~E{qX%D){Q5zE%uMJ}yg| zD&Q?4;RkG;vQlU({mtAWR(G{^VF+_Suz}w*_esS;<43E%ibv$v!MESm+`kplG0e=M zp?hF2>GHt_UW4u++{UoE_G6TXZ5pVF=Y|$BZ)u;=FysH^ck@S*L?#=bqerW)vyZ=N zJa;s;^qH8e$nGidhtJVrH;uW{VcYXO+HX4)0J`OlJh^c&{@C+EHY`wUR?MCS&fL<$ zc!R0aA3>cARp%9-+!Sy0o^6OnZYrIj`{JR-hWNti@sYOSF6h^XIaX)3$F?KPTyR5P$%CVO+&{fMV1|G%890@T>ZL|o|1$~$uI*h zyT`|=uEO=XtdtG2G1qQxV#H~FPG#lbENlM@;MHI=m5-|^BFJ8yH(n)J0u7!u zJ>Cx@M%Lf}uRh0zNr!qfz=w0exw-ic{q+)aQ-Ud-^*mjmWewr}AmpNcO9!+!U4Fh( z=0FAsHfL=;QxQ~Vv$l7VJ3grVL!1h24j(RMgTjP;x>PGXZGCgJ#V@~tY}7nTZI&RmNmf+ z_@s;uC5]CI_^Ia6e)9PPFO>?%k_+I#|=fAoHtFtO59gOIbFjRlhmGgo&otNZ}5 zaPy@0TD|7>fsfdh%Z%$1jOZY^!N=-YIbQblC02Wh{U2HI(X->%o*kcF>8UP226wpM z;Gv6^=|HN$$=TA289EI+OiNaSp11|Aip7xb_0*Q5FmsBi4N_?olb7*-99ssK-riX59%kXmLLApYc zgxjD1Aqrg5L@er$O@lt8Lx(OpWD|z!w8DSqvp1tn#VcT5vsAP7!7>2+{+hL=Ocyhp*{2;mULQKX?Qp+vo7B$6M zys;bGVvS9RD*iW~S>L4UdJH(+AVgh$tl92n-lV4$vwv2Mv1}QQFuHZYZ}^;w1#y!W zsafEMxs7VlB>sj)kHbEN(SE2_1cE(%z5FCK3vw{yar+UFfHxJ!TgO=3XPm{bo)pDQ zAC&ywb>hKn-^fUkuM4}aEW?n)fF8q`RV6IBQ~o*$EV;m~RHbZ`8JgNqw@JdHLs_SO zTzGkIXL81#tVTc9e*qj+NJWoLC1LN>bRSmc=p(@T*y??;-o*-pupAB=mN7UIxZP=a zG90ib`hF@2K<(N+iN%}aHYLeVDFFIu>IhUL*H`m?#=Xo%XfsR+iBrO<*n0Q*z)>~y zXyf3N;~FG5jvc)5S{94L{><_rxC*lj{7gOzOQB|j5P(sR)cT)z$ZXci(lVyoU0GQ6 z05S`43z$NvYZ~a^SONZZv$~3lB{>^Tt=>gVDg5X=4Hw1UFC^9U2KmT~NOax3OAWNl zd#Y4B2mtWuV~<4P<{gytzhi|1=Or;*N~DGN$gkRNJn#;nIa~+ZjT0yk^FP&%9!fuF zT@khMlx1%+_bE%d2kumx2Gm+2rB~!e8|CSuLQ1O=iXM}Tr%ANH&~QbWoIC}QsGqAM zQ`9mc0i0&3iRIV_B%43Xjx5#XU~AeH&hFz!6r54bw9%gu>5ZB_3w)FIOf2^tzg9i^ z2B{sL%6y}}FH;0c&2dEoESa-<>C{a7RJBYXb}?MVJ(*0RWT*DFOzs2zKlGY^DL}7z z-SeY(%auWvSt73eBFWs2gJ5r0|3{kWYO*yt98Gq6i;G*Im02doe)nO1^eN*~)apJe zER)k$bbPYJ_&kRID~aVLhEZg^g_Q9w?l0{Bt+ZQ53|B+OTbK4S1>zj$EpxhDy;=YA zcnetMT6IB*jJb#tS&%GOCi+t5d@Z$XiXYU-7Og~3n=BRVeTO^GnYZ#d_fDF>*X%MJ z{d@vODB~2j=+DMMU3a+Em z7g+3e2hJC|6O7xnL^oe)u1_nN8h7M)(fi3sLY5TJuJrw61+Wh`XT^1@Mo%1@n`JoA zZthoPevquViZ=>DC4M3)j^1h_`NxuyaN*^`$7TLW)tD5Q@;O(oqKn2Mjl+tS3Y0SV z%2!_)`7$k^OEY=TLEmiU-c~co`i-)+xyd!ZjfY4Cw`pHkJ3qBOPDQ^+2Tj$HQ+X#; zFX&F?a)(vkZ!H0pa)-&Fqyv(c&N#2pvpcgQnY&ZD`r}l7XNEC_`QX*X>Xq0{4gJGY zddl*@c_;U&Qf%g)Z*9#ttZDC@UOU>+0CC`gSHLE)SlXTO6{Ailzu$L)37fb^sicc z)%;b&83`ock0+MMt=;6EQb+(}HB*^;J8? zt5q^4e?(#W0tF%Nx@2RM>S{s~S1IUt%K2kiBOeyG5W?NtXem!aM59Q)*b;CTYo^yg6HC2Pk&~`2O4cA{GNKFFQ3&}WK_ZyakLX>_fMqd6H${gNYZtS&mF z1wo7cB-aU?VY`4DKBV1sNNZ1uX}mvfc3p!i?(0KZ?vQpw!tLjhz~K@oM!ZFybQNM0 z(vIfQh-t%>S7&{7v**Id%RNI{KyhjYZeGNioQpsuF*201r{QItin#X(vqQ+lNaF+0fIB z%gc(~x%3}X6nXh8`xCx4X|*P;uBizQfu?Zt9G21roTX;m=p%(mRC^rwXg5h^#gI&KJEY)aBxJD$tZu(mk^p~a;)cV8*RwU* z8q+pqiiKGTPuz9`tV@Q?4|Wy+pW1<9K9rBWSb5+|L}peX)ZGGDKS))+E^eZ(bB1rA zuNw~x`h+a3`Q51Pbp)`g?rZN}L4Uhew4#!}PmyE4TUXr$JA4SWWXlTM3M9^P;_l6T!G z6SZJ(id6%o{eW34vj9eQocFpgQm|;4(eyM?XTNrFVmbBc#~Kl4>VgY9JZkkxu?SE$J^ZkY(wP zEXSnmW^~ZNO3Aim*(PN-26}))=t49>+kegNw*|naz?fz z%Z1;|?#)IQqrX$)mStdpMHf3GUm919JuK8EJ)PWFSuh5V7-0;4OzHXKfLzv;x1?*2oZCO8=o+Hj z$n9b!>xeIUURiox$vronmQ&^}vYZz3+PB?60%Q(7IC`St$prmcWzElt1yYXhNf>-V z`Rp66yJG1sH@(R~ibpaVA5j+ERHQ&#J}>9H_jHz~CR9R%=2IpRMJyd2}^)QkT^8PNs*|q_|X4 z^+d*f=|-vkUY03U4vOl0`bStHgKia3B7^SIAmI`uOtEG;&r8&rLcpknu3ay9z$l+F zUS)(^Au(-|FaoSvkV-*!78wG?C59=tuS^(IXbMrawv{hO*K6?AuwEk*Qut^(jIZ55 zlLWu|ME@T9=oXDCUl11A=`FNyiv~kL0Hy84`S!q#ne7SwlOs=Qe`M8)QM>8W@pQV{ zIXlHrw$dJqR}5nnzLCWpZm)V{=c8x ztSKrd-@R(ODEZ?lk7o^w-!tQfH`rej<0)0Pd`IreL-^*Sn}tcvvY#E%epsVap)3^Y zZld40=r3kWr+;$`rT2d)%o&(CX2RG=b3RSgg|Ij@2{NZBA_4qP%Te*yhrssd>5w_8 z3KFR9K$rfHoRN=dC}HG++{}!}oG{;kDS3`A^Lfqe^XBI@n3s3Wnm;_dQXfdfY}}!P zzQc)7MLRcL58W4%tS-}YYeUazkSCjYE&Z=+iG9vZana{A+;f^g*q8pG#TrXLIcMX_+F?$8<}1ko?+SnSYT69fSv za|@wWK(%*+VRQeoqT3o`l29Dm!_u!S0K2Co!GvacNBl}ZG0B$gxL9E_3bWj zM1%aUAP-d_|Kp<3!<*k=5`5E`WRiXHC7+(*muwJuBHAk&u;sm9Ywf({zv7vXv}dJypN@GJ6>U-x{cIk-OrOnT%e#7LIK_?CjiJH56PS643dUv_t9f++Gt@)W4(vdETGLTA;#z zxk}a;Xw0ECsn)4I7}e%lfG8X7rrFM74f!0wV>8gWw~zf!|WO-<2o%)&v)fI5eIEsxvYx*eFFD&CC$&BV))RIcdc~wFGKsIRQ4SEGuD>M7(O?BdL z0uNd4tj!c%3&wMXMG{{$3k2%(r)%jxh3U@vMSrcer;*GT>kXj9K|kcSI*Rg%{?ugc z>yt?%;6Y-%~xtROs?rhj%RD1 zAIrL#zP^wTPGvk~9Z^@DSVI)0f!={uy5%}ia~3@{fkoeIcs&8NIc26e%f1l_iYGP^ zM+2k)u-HuZ1^aO+ZpShv%f6O~D~5+isc()w7$aY6HrlZx&J*5t{f<=Z`5;crhGCZO ztflj%n*8977^?tc^`j?N-PQ*sT<^e^809tgUx=hU`YSp6D>1lXN6m(Sd0tU26Z{pp ztza|vV1i7jhtfy$McqBL%Pgw3$h{E}Qkqe2jQcCLe7us+Pj%$$yT zhd-D-7oFEIDlbBkL6El3^Hf9<81*&wSO%o;D9fB=8TzeMmG8&~dzXr>+Tsn7F=(;iVK|m7Z##uJ5yLz?AXP#U`=;~x0>f2)$_T9n< zci}Rs#36{XVmk{sC=$+dosh=RpC+W04vR!+D21A{XZpK)>Eiixn~eB9udI~{pXXmB zrRwVE7v<%{MJvj69%99*fUpHn2=DWUX2;Ps$9LHExvD1tDy5Ku% z#WJ@B!#2wOhLYjkvBF0N0iRO_+BGqXK^yE6u|*<<>fMOVJI-^9w+@;q2Z2eyp-K8poWCW6%8-4$g#wu%}&xc9Ez2s1B!=#@9K!PTp=)220Q3C^JHyEP29ajPCO|V`)a~e zp7@z4W_050&^-!KV@TURZf$QYsnulTr&lljpJVJlgCDF?Kn7{I9}36o<^t5%>W3`( z$qAyW0C!V($i9|g@E#eQ&#OlhHU_7a5fl3p(O9+=TQd!`$v*m8GkTgcXk9Z4Yav~e zET2BODo)hfmSVHR+PsiNRm@2hla8Z-l>2OmQf>0rhdFT)ByCl`_I7%xM+W?h&v<4n zFWlNOjJiTNyn2zN+TYO~#$bxkDdb-&e=%~iFsR&wVGP4z^p05j9kDpbxpvxn69n{c z6r~jnuVPRS-z77JpgNjViA9>vFk?6_79Z}$Kaa<9#=e}v zJ%KJ|UbZ9u=yE2t&9^jL0MIo0HpoPw{*gD=u4abZxB~%$s1`2p4UDL!n@X-nl%s|c z@9<_uuc6``T-)**4CPs4F&GWlU}et_?7FBGE}b<5HTyo{oMN1PK%$y~F+RJLnYwX1 zgY!K|nDXIOB)ToezAXkLQ=Z+Ohr2ZY&?AcUYF`K-i#Cik2*@|F+ZCdPfR z)Yon5m?mmN7+RxvYGh{|m!2oL;C_==V_vgtW&lc|Pj50{GAR?VDE?iq39tR<2#e3L z$1Fp5V;h7lj@Z_)ieK_mbxxKtt;c4#fY#H7jgUje<;+jo{w~^@GV*A}5)p4}TKsY54S~dDv_mr*JIyk^`-p z5_n%b?we}B`yv?q{v3886Uzmf+U7o-rJ*O-$;k8*QlEwVcp>Qp7pHXXzl6~!hDoUz zW*N(>SW#(+MpjYe;rP1zox>XlP7|)m3Sn+v6I-w*HVHrrOs4L`Fn8SDf@mUoE>%&4 z=27$Lfwk*}!Qo#-Qz=~JYZzRqzEBT0CtZdbQ}5`MThZ$G4bFk8p2g1tfA(D878#iUruBJ(Wi?gYlmaW3_M^pjwW7ERt*&xH$#Ig z)d0O)kB^Awy#dcFj8%8W72FvIPH&A-Fq4RPiH{C%p@7(Cg-{NxK~AB?P;J@0jpC>j zh!}WzxEAN(N5*5^7{X8t1R(MpcRj4c)o>$WW<=L8@2p`|;F}<5m^R<_@ET@vS$72V zu10HiAoBu}wZM%j=y^zk6tR*pHL&1iB4@~#O+wX~2;6B2xe7=7BxLlWP zoA#IMj=fE7Edm#H0IV#97cJ#hM#2{3iWR4crsvYW^;X2A1_yk&(?V!q(ymr*{5EaI zwsa2wUY*@woz(|TkN2}s;F$B~(sv?&IcUGRMH{TCson+&3jSBUEpu0hHP!g}8;49M zJ#;u(`}dkL+Qjl+I&FZa&9OZrbsh{>~_JKRFsoo3E!x>Q24;1jl7>n+MA z^ojch=Ms9fmkuu^CVZRNYaSt_Uf;a|Pmn1q;c{niU(Mp)+{#J3*syI_Z~CQEUwEtU zadcP)7bw-A)ZVK7*vCD-jo)A9`M`8z7yr%rXw|JPRk!Nj2y=(p6$0JOqn&*leYAo_ z_K<%4Afuoocj38S?d7Iz-drt&5Shm5#!`sr+VX-g=Xzc>e|{^}d-Sr0&i5gBh3;AgEak`- z+=i`5xCp?`_fy}N17C70e4K!~PSplc36 zGIDbxkVey8#SisUEpV=WsI+D@$}op_ADd0BE2wp=LWngE@YW#dSGS_jzIPy9pvwU< z>xT|j66Aj>J@CqOtb#hARY%7ky#Qz@(e8iy0TM`gjznI45O$E_@;TuQ{+TV9T*0&3 zqJm_#AHs$y{UTiRS8h>-0K9+(^x?2Ej)XG1*MZU8;^!3WIs0-5|69c@bI!hEWh#ax zOrw5$T=|nJZnBi7?qF_`+{j(K6)~_M2`vGmc&L`6&4AUZpF-gtBsH5@G0+VW4c)s+ z@~fy^z+NDoYnuR3er$l*LaHhsC58icqmTKz<;xpMx%_n*H1V~bgIfdf)T@%fBsJaF$WCqd5)@ejt&(iB4CQ71h!o%?VA3AAYU8|ZJ-srS~5UziR2Z%sNZ zhl631-Iy$qza{Nf5@8wyjcpgC>ezOlEaZMz;k?<=>`uWzRs`?^AyB9QMY^db?ZwNTWn>R!@lY-9X=4Aqq748i{|6$nyO*5>U2z zm(rMgk*~?LRk~)caisr~u8&awqaR9SLfAEP3uecR+nFD^`&(pK4}?cPO@c5z6!bUK zknG5~V@q*Yn%`|5H9|9c`!*^zqjBF;eSp5LiuKJH7>uO{S^DEnQB@y( zZs$zYuclA0t+MCYAsB>FALh4W?p-OfDjFaW=#!oJdpG`e{rPv^pMSfz2gshA$#ibL ztmrb6UhS}zRobxm&Ubik0N}JU^WN!nbLBA$pC?CyuYD$escAB*SIwRN9GieU1TQ|1 z);-MqAk|0a@gGTw0FyZ-ZKDZWc@Ns!e+3rRm&EOItbp9HB=@9Z@Hksxy+M~5o! z64~>aRBk&fh{=rMj8@aLI~hBazd5q3+&V~v9newQ8%+AwrL>71D85V7*lE0IF0bO- z@8Yi1vMUo+bVU_cJ6lshHLcoP?&5EsO?=#k59wc8AklFUAIZX9SlQhS?2e(jTkp7w%NkcY?k;+8*4Jkj60hE}UNHvj=5-H~ z5`M`|$MRU%sf6mBHgh0TR>O>UeYg;&m`JzYxeZF>4_hp^Mfj}&?eo&ijnC?R4Jh@u z0-~FTGO*V17t{HLC>xj7?mE9vST}ENXVC$nUvDYBSQkCNkUzhWW{=xeps$?olE(92 zx7U9`44!|&=hU#99D540nmunsGL073K>r+AgKLFL+Gz>KMue~s`bQ1!Zy+Ju#(i{3 z0@qQapEpjD%-lxojbPL^&tvj4?xB?p^s@?jdqn0$IeJJ;3}OS)e~?WzbsW9#HhO$9 zX*LhZ<*SwqE1jf#b=5YD2EhZCIZ%;4ShbCL7jVBT9pFqx?(+U7)>~SLF@~O~aU;$! z#|O+nzRlu<|2ArSdcgMdOq&ZfW@~P)(+MmR(lzqY`=yJN71uGgc#~m)_^&5M3FV&C zEot}XENSnx(rL+X!nSu1+e~C8+kmOFIy`&Dhp@AczDJbrF|#TrS#~8J*6wPiLO}FVQk9 zzx>|lTV&B&WZP~)MreOU^i9(KCK=j2dM~);c8dd^G%NQ93%m8AcZu^65xlpr9~*f6 z*gU7*2{A5A$j&^AGanDM+I={d`1k`jgEz2QI0sGmICdNuXY?C(<2S6hTcGjv9M;)$ zEeMbS1S2m#m=`bp6l$Pw?_%F#`t)(`Em<@tW>HRzxJy3DS4uq(h+Oulh7F{yGF2SWZ(<|g#QN8%+L5c^w9#}n`lR%}C#9mWmH%l=X# zD(ZaPaYz{&Uwc4X=QF*POpCTsl{{MO!pHPKu!z%d7N%TsFRrM6~=|zKkD99O^PgdRjx(Q2~nA>H-3~UZ*G3wB-Z>N(h1;cc2vZyc2eXJop%0qaD2}1~42zMLyKR|!f>gPSk zKd$3uKSr&|m;gTBOdn0=+mf{x^m=qZ?|O`TCj|{;b%zSUkN+Jhu$d@w_Yb`n%CZyx}-?K?e- zr7ZVaipbDoygLbNMEJnMOjwB5bj+~4Nz!3(XhFRT6I#E1*Bz?9kdTx>SLj6kEdBBn zDo0-e*$;kYU8sk&JlNy z&UmN7hwz#(_vM}R%L%~S`swkTWQJB~YWCud2P4L3Qd=Po#$tM@!8awB#V1E1gU_O7 zLZdNznYz7Bw@k{#p_)=dcF0ycI*g*kPPd-_A@?=VmB9=Y%&;&mACP=1EasBlnFF z$NT#yCbvLSje3sY^uo>5;Wut!yw}YY;RECjFs<0I|8}Dwbq`?4hs{GD-8ZGMo5-4h z|4;h{`-rq&?sy{YGuW2XU=k>?t$^S!MJPd=mQIl0&l>uIezmF@vL$~Lec^GgV|mv) z?Un=Dtn|&#$m!5w;_bhOHaw!8o5Zg*tho!<^K*%rMd`TtzMm=TGVnVlF`t4zgYLlJ zx9=IR>hSktP0jVZkkV~$XPzh^Qx-qrCg_KJLVEiaD`0K&H4Sa#JDz-E23gb+`_O@7 z+1k+zV%Ysw7o;s#H+Ov(=eddGnydS8Mdt1j6&PA7R}kw!sIyD^hP36U z1pSqlxIDaltvj8@Dl;Bt?lrwJDSGVjCyqTXst>-@V9ZFHU-(IaYtEeptof{t9A7I? zL#)bT&j-pOtvNHd3#?|kV0z?20`HXY@0bjhzOLXHrkl@xl;mjY&1DJM=V1r>m|omM z7o^i4m$PsuQ?+c-Jw%gl3@}qTlZJ}u>OISqcFX5dT9HujrWUZjMCLkO5gdq6LJx3@ ziy)#7P-7jul3z=esmA`o=7hqE1W~0=4vmYJX*s!*h${Q&x=?C!OWPP1dR8axg%ZuR zCc$FHpf`xb+38;+c?QGNjCut`bAd3|)Z^NQpWU*7$fte1f!KCLpJx91*XgI3$Y=6J z@5Q=4l&8B~ZuGPr)|FFf#fN~eX+JR1!K1LzeKwgC=$%L98tOCCAB`eYi+;{QAs?vR zM6&GmqjuX-V&%Q5=8JNQkl@Td?K22@%Hltl^X44w4|3MzY33i<3|9$FxCS%#a;a#h z-#$2rFUx{%%p|*ypNkbO_vj@0P70TvHu8c9K_l)h+?5P|&*L&f4|q7KM?g4hID!@0 z_H_EL5xAg3`()VKX7lCgxWD+s1W|vs_Q4~KDo=vY&v3Q((?JDE(TcpcwIdH)f18w;cK`BR z(Tf)Cij6|tG{5OFL|pvzT*^b-<>#W?iE=x?W`0@{*l06N~k%SzIn5-=DlBIl->*Ncq2G-E;yb$YY1-&uc55Xr zOVM)E-{smrB_e#bLrck z$np@UMQNe=6?u8S_C8J00+Y)L(3@$)(Sk^}DBLr8H@*2OE|yo%w+|mB?Y#AEvUun> z&MFaXq&enz1Y6K}iE6QJad*pNt?*Cn8Y{ksl#<1nRjev3gd{J{jF&J@lkijgmuPX1 zUidRS_KhOpyZl3H{H_j}$cwQ>&lb1Xh413$EY_XoIN=d-5;g~{o9X5ybcl?6=Xz@! z&a1hfB6_CTC4Y;~mC{%hvK6LUdNi5qC?}!)Lrg%DsZm|-*M58`z!l8RJAd;4`aFm~ zgA_hLk6U|>_!gW3nzaDH_6I8<5A-n1^fd;n8%IlrYQp z_nDIwVt&y!^b36v`fe$NM=x~)Keiy(u42UcK?S3nY?PyV@9=hJSD<>2+Po0>!TmZD zg>;Q!5DkICch(SHt$!&Hp_sQftq>Y{jZjqk1hGllApR((C3Mdby8rLoYY*~gP9WRzKk15cNZ|dM*`I}`L zv54d7zW2s#WXgWxY2i$#+HYMxb=Tcytwi7g(t6!yH(`Ornzg;iGJ!y8DfB)r~jHLLSo2IPeSPW(SC*9 zU0}0oD`naTKd{`-(wQb4H$`Gqj@6h~xN1jc#8jw`<=>Q;ev;gx-p?Jfw;WA`DxlX# z$5n8TYni?DM+u^UO77F{m+UC8-zgZ0={hN@P5ieP=xtK1VLxd!={_9u-_W;Jrf+5b z(|p}CG?()Xu%&@8U4BL-FsK7fT(Md!l@HZHVpf9$(O)kyiY1xmXJmA3Dpkq&iL$^Q zW*Hn0iQ-4;tt_wyT%Nt)L&n^h%k9X^!1b6wPfpw;&1&tATrUPDa^l26(e!82ceg@^#QCQcNFJipwRd_`yT8 zU1ze4mwaSS;}n3{z#+J2s>t8D1_#h2E;Fy;1fWY;huO( z)(k5xmA*)KPef+evNl({AVX;3C@*Wl2+H|)R;7{kr|&ww|MWAbUpW2L=@-tdHk`iy z%$n0j@Sj7cpFMrx^x-qNoIb*+-8bO@&z@O#`rzru&TKe+q@bO2p1v24dExYv`18Q& z=kdP-XKun{Zan?`nVXsVq+R&;(3zVgm1GC=zX*F1=%&iG?f=;&f$TJSlI$kCY1)RJ zv`Gh=v`L$^4N#i4Ne4PW2SCP?CTSb!MA8;o2CWDPC<+SBNL4t1f}-Mtc&v(w103}@ zk9ZC$4mg5y{a?H2`@Y}$fA9L%%7Q|Zon#NsexCcjuHR) zJ*U>4TF0zY3;5SDsYG_<)K+Q2w8u|AEZ{AUojiz*K5+5~V=eI!*`AXRO1F~-r6-x| zS3i34v6F`cyxBia9>d!m!KVIk>N5Q6-jfgGtsgFd@%YxoSwy-`acUFZ;ot1jL|{6Z z_kEY1{)zjEdiG5)D`uEyyjrpaB;!R}%Hou%bG!bDz>&T?zN;ErL(0~WJASAli}>3r zBlgi9{NA?^lyz0nh?&tUUmZ>-y`zKP6O6t)hmZm5;MLh5B+R*l{LH+6j_YOjjAWm! zvP|fh%P=wvol`b=F!f@m#*+ z(>!>*ZK#an8M*CIuHzr}XX9-e|6;gS{2sH(%yr4QcM=ezl^z=RDcGTgIop%c9?e|| zicQ2!N^=m>3o;)-W(7qquC^vnYW&EY^@|J+f^(Jc87kkS?dyy{r{A7PB4d^BU3}2W z_kdf5Ux2>04Ws3Ee}_QpkE!-Y6|^R?1SlEzc1bCuDgeh&`F0*{%d!Oa| zIq{{E>n773Eayt3Z%Y|bvq-^WrK&t3tkQF%3eAfA6xj$4B+5e47GQb|z}r&n7IrBHZ> zlurCz%%>GuPs#UxLxlM$r2=Sj5M;aa=P`&SR7r2ac{N#SC`$(Ap*)#B?!{#-s`*L6mS(3V zTVBkbq4qZs0KPEN$m4A=epwEjp68qNt(&<);c(gVD62L_8Ix9!ZqTmDUiwksGENX{*05Kf&7X{!qyMmJY~BXGx)bge%yNM2!j9Sddo zfNkzAh3Q=e6W`cEW&oA;tzron$>aE|5cs|Vn(h#=ip2iHODDXvvd1DW-0}rsKXSfnJ6BZ93UT|I^(58gN`;@TTOW3+Y zdW4IE!dW=BK(=)OiqztH>I6{~%db3qyS2*+f3sX)SOBsel=fVFibw>-(Xfx*EFZX8 z&J1D#1?f{!etw?mBr=AEBm+Z|S$G$5Es9N{p@35cSBkB8>@{ihH7WD49thK7u*U-y z@57MusNyAg`C|*1oeLPNOeSJGr2`?U3bHb0LmR-q;06^nzsly5&0nJ;657 zei^$=GH{t>_)X^G97r;X)hI7DCK>tv2sj4x5+>k$R|A%Is0?YVXP60lrYwQZi)+;2ZAKbP< z>|lJSR)7cV8v9)`^t;6LpV`leyB?peaBP=qXqUx%vvlzPV0B)mL6Zz_mA>#e+4H|)o%}zj{!dyb*P;Cj)~V2v8uo_2 zSe@Y){ufr~8~+bhr;YsIQJt9>^vw8D&{THti*(v?0LUz~J%a(7hJ~9>DgsI`K{&8l zCt^8WczUSEGC)&x!}rXds|RMa$W`E8@A)q{=djK;Iu7SVFt_Og5zhH>_alr#?7noj zid0S`Z>qgW<@|wJAOp3NskGd`b0%>-!2LtV>>melYA%8}htk1JGvf>a>q|Por2S72 zr*@o^+M~I7oW-g9i^XZ#^WQ8^t6HQpcAFk!Y|LmTOdTL2U^M|+X7XPgP6t{!?!P#k zcL#(5b7Kd3#zHmFm~eD^$*i7E5m;LBC#I~W$!lJ;W|3FseT3;1S(BCvQ=0Z7IL|UovdSE&2&ZZK*_70k1+ATS zRHmWy3Uq>*$Lvn|3q0wYj#r3yKNPcJ{bvPNwo{L_UjnT!5ycu_5GK$sL^ ze1RiDv?XBaXAaL0k)1`2L*ZVC|Hi!*uq-k2V2Ok?Eb*L-vB-2G==pwuo)4-#w0`kw zi~tIz3d(#SbCB*=19fOjbUe+f(J)HuSOQ~70F`ZwXMW*DQM%aOg_5<<>E@Y=y8dVs z5!sMt;+3<2bBHp(x-D_3=}hXmDRI6jZ2K6;oZ}b^CjeeuXoH&C?dBMShW6vvogu(8m; zzK|ZQD7OnbfCXBQg2@Zk!dml}5}x@12N32>{ke<@;S6PP&nskr*T>s0pM*f0zRok^ zK4%pp-=@?zl|uttSWx$`zYv!E0y*)B?98a~(#iHZg6L)u;>j-pjYmV@-ifAPup`2} zuFPbXeKQZ#vySTNj;7plM_!d9ue{S)?`+5of`1KRtqsR2)8Wi2#(k_X3zN!mS~k$j zKrMmd26T3A2cDGUs>;FtCj{F?((6fG1F7;eNB+J6#0c9$)v7@nU&g{DC z`Av2H@`}m^TT7MS@Ao!b(&V+(XH7kQN(Q$!w>-Y&kQ{Tkig-mXFy8!Vig7ZlKuIIQ z?-H!%YO8D{QMn{gTSSqA>3VDcdaw@!{){L3r=77pg~Op#jWaCiz}$K#eK zkolHs?rw#(LwS+8`FGaJC}jmXg5$@e41ENEO~{j&VgxUua5SR)i+hO(LD(JtlOTVnk(58VWo=B4MPKqYlXZAUe_1+l zEZ(*>aQ(>}H0w7D|3$iH-wcO~f}wHBYwi%JYC!h+zTn@|ctLEU6)H8JDU`EChTUq3 zP$6|GFerx%x2YqnrSZ0heJ3fS27_erBO2~m(?}=NOOHY%p!wA0nEm_V@`y z$s8IgGgX;Tp>^i2v<165&4H4!DV$*n$94-}6{#cIs;)CX> zolOa2@#>3@?eS{ocVz5y)y2o&&sESkkDX9md<>mXIlm-h`&1VnYxk*~Uy-qms*8_- zjVk9C#Mq{yPw8qNP>t*#{ib$D`s}uZAnbT zv00D~^C^kq*24*K&|Wm04FJnsZZ{+E2CXq?3i;)MO>}~EU4-8OqSw=f$s@B|-3obyujFGw8j}*8%P;=Apz&gdpeH84-ZI20Q#wU6 zDSU1$MM$)_WCJNlAA2Fpt4Ay|@lz7K z%gpYC%gl8m*90Sb03?lG;9&bP?m)p%2JLqb46``nDll1rofG@C55ZcPsevW*hr*tr zkRav=ir^eBqA^4{vTFHF#wKn}cm4Gfv_(bm!I;sf<*jnwkgDvp_b3po~VQL-`Q_FlwV`m5cqEo2@oqt;xPt z%^KX-sQrx-XRiZZ)3ch?)DYVoZd9}<{7Z!zif*zp(W=!nUO_0609+UbB59Et^!dOL zUnX1`3FUgR2rP2B%KZK!pS#@c@cS!#et&VX-|xf7oadKrv-;F`93uRa+&hPe9^&Ix zG@BSD`YwI7iD}(K3{!Zzlxv&= zuVh|LjtqsO98Xc4ZSN$rRbW7bm3g2W&pg4GiWJT^8d!JG~Qzl zJkQN#*S~^)SSKY$cJude{PpQ%L0~VvQw$IGKgQ+E$7n1JV4Vs9$Q~Jm6?+Evlh!xH z9J?bYLCD1fG|(CB361t~UXCx*>1xK+>lUfu188z{nV+S1TECF#aoGlgc{Xl!g7Vs= zRO11i+T1j&V%{DFJ0lOj8&5>@1_9xqAIjK7_QX69jV|V>K?2>AiySpkXN(**FxQlM zrhr>*pe>13lR=9YPm7ey>5?j@Pvf@^I&Zv;-R3UCZ# zXvE+dMfM^rjve{@0hy#;IK4wCLM04!6o-@533D<3a2Gp;F8X9@5#V3Y!Q)O1*-x2W zus$tE)eSKuxP=7~_6`r)5=#Xl5AbyGonupl2#T2Y9BL9I6%pPb%fYXTFdGK!E;`6| z2pDLbf*+rkNnpGR&@CKNudY<>z&7*5B5biVJt@puFi%~@JN<;%O?B`z#ZG)6_Hyke zZxz-Jm}7~~UQEts0RRLm=s|Y+@vawA{zPH-`ASkJi#`yp+e+J$5dSE`l@TboJ6(po zU2HVI5RcmsUmc%)x%h+OOF4Z&&Zm$6yj9L5@XyNe-}8C7r$eq6tQfhb3WjG`l4JaZ z_123Sxbq}oA-hJ#>|=kZ73Nh}SA#LskAVcpLbP5W8_%?Mf&+OG1`B2naDu@=CXGzza28tEgMz(NI+!dN=?_S)gGD-s3qN~9&$L9doBAbwfM z#2;DVL>Vv&^vg_oU$PnnK#x$iLi+9O=~vqRP8ygei8*i+#M)OTV?J)zz(~~@2+}m! zS`)n7@ky1kSv?UtW|3ia22W0HppU*KXDXNbG1e-RvEg=23b8G3keJsnJ8S-(JaP@^ z&)o6tv7fTmqP@S9(%q~vma_kG;}eFLk6|}EjM*$0@$(1D^ojyd_0N7IwiLQTBTB0w zmBZz7TWa>Lsl;sbC9#{ccch>tUXsR-bKw78Vt7paH~og?cFB^sfY*&n^z{^y|N0|I zChn?u(qsKOg*XgdDtbP}a6p~|qft>a;;B%YcG5?%Rr~4G#p%R&NG}PQmG7IMNfm|K zl*H%c0ays7w#WsVZ=NofM0$*lOlw}Im~R@Of5^G0S{)RVO|$+3XG?PY=rL|#rf8hV zl&vu+=`OXgU;fTznJFT3FS??ygK5M$;e1j}U(ED$Wui&h_=k{FhLj*2WrV39?(+Zy zVKKU0Q;ISR+#@0sTy&=Zy-C$VDegBzP>QX@jSo-9SkAqZozs1h`Q92v@r5SRX&8(U z7bt+g7VGVR5N}u|fvsUm7QHK8kDFtl6DF8Xr{n?39d7Hx8}|kxQN~&j0GqP46j?f|0` z%l}MA7QU!EU&zFgr4XQ3)CxRYYc|Sr>S=UvS|;vlScYNtW6S4nN~*brY})HzN8YZf z0tgj@O&z`^_?*~!XO3X}ILmamVSq}%Wz?oq20?|k;RY4mJcX;YuYHXG00#nStZx?o z8N{yx9}^}Qn7AZ6^iN-ytmnD?SE&VabyE`;N=Y)9I#N&wg{`%ykb5MVKA6JjvPMjf zZS6$3tpgeWZ$jW|?vS8|tR3~+;=t<);3}P5yMxM7ur>Q{!e}=72EFhY&fBy{YCWsc~Amr^{@A45t&A;hs(`98}Po_N@u!WG|$+v2}8(I&Oq*})l zzDq}*Ke1aHWYAfok8<3m1SUq`ON7wxW219T5>jD6DS*4uG?#IBZ-AB}Na5;e*0KB6;Sz%Lua-Q18_vr^xT zd@S5aG>Q0?H1$F+fgxj1mVLjF>Fu<6V{JVMlSA2U{Dr_N|BX?5JHsmjre=vSlPj@l zzDb8=R^>QN?YgKt~v*Q~&7 z9gd_9vCvSX(oJ@+Av(*2%YbcxJ!%=kaMq+(i8LTYDGzlrKWmKc)dY2+sh|ABkXc4= zo@T9nCQLh>$OK%FHy(wfD8s1cESCtiKn@}KcswTO8^q)^4v=C0@uS06fem?^t2I;V zd^}a+D&*d(R2LVlS)w^-R2qy*IQ!87Oy#G<)eG|TvHOT)O;sJ=4Y+HAk^z9 zXe|!9T0@0xK^MnnCe<8%;>b6&O>XT@I{RDWMrBTO6yoJ^lbm3q$y%8cVA>>w=oPw2 zqrFHtr_RY*j1Z_$(&rN-v!WC@(h3$L!N1boQ@tdMh7=Oy!sls9Gq46hrEq~42@P0F zFY+k$A_*FGcQ!SpzNb%ULN$RtTdxQSs|D_I6$sNFI$WqxH4h@d6&6r%ah8RJmcBp- zo=_QHe*_$7I;z!of}85?>{RNsG5n&gM>#BtqS+LdllD7JI7GmU6r2M5L2 zT}eoTR4a+)SfmY$Q@r}7xU z_MT?AYpBGPHTD_vKOe21F=TPB-%SgEIza>T8{ zd>7;Q=%BT3k`^GHqbC?d{gRGoXdmE?>?|!1<>+eq&g5XK18-$Z6Yf;tMw8h39Ejh$ zbs7bP`eEu8=y4s>31vC+s2&B-?snu#Q}&5%sR|qfkPW3ZTp>>er3FZFM9K!6&mfq0 z`4^~7AspaAWelT4fTLm9&_ZosAqj9c%9?Tn&Wbs4XJADPZ;Od@)R8gSz2%zJ);b$+WOUOdjOVC0bbLY9?aQ3%Im8`+HB^aqLHW zx14m^Ka;vOWaLdZNA^EMzhJ3d%iO1XM>`&M(#m`bRq}okqDShGm9`)?4+M3cz?0u-5);RLbIg5oxTUStJ8D~U9HFz7Md@jIzu)l zAX#W`n?YztT8iz@2%Pl~3_lG2c^|VyGU%B-*%T^S7YAn=m|dze;;EaR_0GU**`~*; zhqvYt(au{@o|R*tnaun~Doi7EGVlGN?ipr}x=KuNG)~UM^&b!vB3x+rzzE}gk@N+g zhWAuj8Nno@BO=8Plj_`)rIOY`#s!-`-HfUqY?bsmDnmC#+70Mu6rRnVZqo;_!}Xd& ztW@AmvHb2*65_YB`sFzbRU*HGW>f7$uRe;p<^G9xOhVdC|R#@uQ=UbNjSni8dJGm$`h0_;tlH zWCPGO`Me|^rHL=(B*$=CPXDPmb31>Swa-b+uC!pdOD*MhX_eXZ6@|pxP6sRm6(S&A z&V1z0A@(lf%`rcaNbg88O+)ZuWA^O^Owz5d@mvT3m6}(03oi^x4;V z`idSvynJzO0McWg^;&-KYrLqS9Gi*>?gPb&Jm|Zvd}8(4J^QU+iREhopH&5HvrK6v)z<3}yz_i=Yt{Z4@nZHe9uOnq=bImCW1L09)&%0j0048_^pTX~1ChJj z=X9iSDZD);2zTjx(%;_?Ww>(9xK~Lm?ZWfAm$+)^6%A4u$EjhQUd>P z-;C;t>CSU1V)Yh~=}e(Chgc0A1~Q#{qTald_da*|S)FOp{;lemfQc*ec7T@(3A0{b zyEZ9zamoZ|5Jl%A7jz|PCavj^)&w>6|1>h+WFIrqwkcxGg}E?G(?0*F&!CJw)$1qBgJklB$Vhxv6W{`;gP&-|=l zTmb&Gd0hz~F}=^!FC~XdrH2o{a@=VDk6HP;^I^gB0MS*~yeOqR&Cu=-OfBBz{>Wt8 zXGXwIo}t!8*~fGvr)~}Z)&)$-akDVHZ#>CtctyrxK_4V|<^wEC%p-H9}>1SyMMSc}xTK_v-5V}}*vDq%SOUxb2 zj)N1uOZIHrd_TiBmgcqw29^v2`j()iv&30cU>@!QynJr3F2i%egQfkW9n5mi5zp}2 z)0cCBWU|0hIFrevu|%scTNz?dNb{KM>-Y3oo*UlIy~*<*rP1G$>*JYYT{#egL%HH| z6&9?S!fh$w+(zrPbb3b`r%C5RwWG78thZn^34ZGCL;xX)u=x~EdWX7eM|X6++Vw4$ zZQ2t)eLY!_J=bi$nkPlv)qq?&c!oREP#P0JB0<3n92jqXQ+1t(B9Q>#)qd`scnK!0 ztFyU5P7(>wW3{Qt+08s#uE6xLnc9?U4_?~KsWsG^PK%TZF-D9|;YctQQ6=;-rO8C! zR3hzwniHsr=KYTd56BW$WXeLQ5WJbGFYiRCR-~=&pT;Gcl+5w;5Fi-VInb9DXR!T( zqO~*Zq0eMVLhYPGDMA}T@B_vO6pUyuvrGgxtP~t#00bk!OnEYY_B5SBnGM>DPDt*8 zLV6kXW|7t0AQk0@>E&)FlIO;uWvx({#iZr-q)&rYVc z!<4tsjr`cJ%>R6x`<1c%+V%jyNJcFx?j6+3cy6AZgzS4&WuXZ&l&a`*+3-GX4szxC z`Z&zFh4he;EGY;}E&E_vxt7Cl##^9EyX}Q`!`1SE!QOpSHYW1Sq z5n4#vouCbN_J_hAScoLOVLH7Sornmx+mC>_iwP=xCr`S~s}fT}E3o`R7l#jNwBvw< zsq_jpi5f2ObgK?CKIy4pmoAq~O?{60B7sbGzDUT_@N3crrjU7oeG9j0=N_VH?T>WQ zMKbgl7j{QDdOKIFLdws2BFh_b7qCgC8ZqwuF z4_}^0QR&R1*K4#`X~SE}5gn>j#ayNsa{?|riivGlGjoTuRoK71(3!483j@o}K|zF< z7cl}#R|h!>XzN>bN^ouUGI%k)mxxe~IRre!hv zDZQ72Ep$ak4?Q?pnocv8Whwa^62w~xJ0%k{OvK5I2Lz7nHr$_N+*1{{1svHn9qBRj z@`#Laeln4<8nbm}UyDl)A0j!lbP>xr^Q3QjCz2rlc~80@p@)TN-rk2 z=m3O=CuHx@1`eYb+-+|MRX^aqTZ^os8=1`E_k|z>Q&sVA{6{{4ZkfMJFjay zuQT3?Pt1?%=qbDT78SfF0c-psb{lD^G4gmu_jyF|jeKu^O9go^-H=$nFfl!Vq;m*O zTZ@aB=iOU%3}^UA#lM)&eXMe=9=>n}#_Mx29T(FjZHUHne1y`Kv6dS>Y3e;Je~(g^ z=E+b?)2vx)@3E3o8QJRW-|sweAD5wCN;0gs=#pMHRcG7PaEBno9NFC{xvhCuQFDv# ziClH&IMsPYth2K%!rh@On_MtvSKICC^rm3PiZBYk(TMj$j9azH^w6S~giTOz zs175NGqA@QB?xHh8xpX^0y;36uCWq_f0)RI`rDbg+HgTP0)?q+5sVAh2?AtjVU$c- znG~Tdh$$B=V9MLwYpW&= zw6-7-B#%2^AcH>9@Eec9wzP%%j7+lkMmUI?`x!N#kw_=x^H;Z+-u_^45;HBZ(Z0yZ ztQV@O>w(ZlxDwtl3_;bc=F5e1@_jj z;taQ_yWQ{t5rrTKJ55iP;rr586BWQ0_0bL4vbjyQOe5*7 z|6roKp)nmLAXgEzqz;!s-EV($BJDH~p;i1_Pge+foG2h!=khmN5W*9_7|hGf)z?DC zh(e2)GKUc)R3Dw5@3b;acGJ^jm9+VVbwpAY9hsPeBODTado6Zb zS6^-ymcM<Pk}MQcYX0A;fp# z8dwW^!%!4J!uBKzbz8wYY`Or-bu$Qrt+dj|bh_tDrO8a<+WAsac3~Zn&549)VaJ^H zM47s4BN_N4zbrV(^Pxg00R0#8yEU((4oqK9p_Qp>upphe^q@jw3(y_yYRKPBk2%WY;Yzw(79F4?5;S68;s zD;JyR%MC$I_MLEC^RFgKf@Wi)k6SF)o2$D}lxyHhc^Wa*AC$;!RS+IW-e+ z_NigZZLaqFZKhSkJhXHsUEE;(A>TGF3CR)&(l%Vz1v5tnPwM#x%?flq!`@k`*|sHo zdnUElr>11$NzIXPTYDG-+s<;><5Dc{VnP4&x(x3I%nfN0I1C=q zB}fM@Kddm{NBMhcS(}Ak!+G~wXY=MGT6PO8+a8`fcgk-#<+rj{L)4U{vA0;(-%dZw zq&kXWEf2)dLi!HX=A*%wKi~JbwO)nZwZiGiD*`uaqLRPa!oF`Fe5 zsbpq=-tMQj$;BW{$d01SJ=S027r!nA%kfRArlIUDsl?pECpFL>C%sT)cs*I~??kOS z6d2~1erHrkJ7(}kCAu`kDv5UTwq;b8Beiwyu|A}9u9!&b*evHR;BTcq|E)w(?``r@ zWg3ZXxDqW0$O*ccO?vRCL52wo7c-9yTrIN9w}I^H-;NG8Q;QNIZfo5T_<$RZzI=JK ze`v!!xTJfs3KZo4l+!2kx8FMnFS?tsb!!ih%w}{?Fp26Dy#4ff?S{Juh~xcT^rm@= zibyDO^K2Oisk;eV?cYt(YoSQ;b_Ng+ZT+Xi`kiCL2vOAb_x96hy=-NmduQ?l(gbwZzGB_udk8*P)7^u=qEQf{(76M;5tP${R;6sV|d5L+YeA z)brc&=~>y%GC5O7o2rS!QzdUS$xpmJT9s{*^OtD5C(F&FTKgKUd9{}6Wc<(k@XzKq zv^wE{Yo6gr89i3zpDm}k@$SWQ&L)#0>E)jF)yx)}3LR)QO--7!K|ZufZrqQ?Ud@>n z;x+6apig89=!yzrBx%%`p2l1YH|yj%+~cX-7M3z{Zm-Of95LV5qHZ0`EwGQBZLyzi zQQKMvZ3VPFVyH6y+(4S>!8TLs*cUBNebIs`Z4Yllc&w=wu|O-3ZwS$J8_o$FXhZ|i zhq7s2S{};FSbB*l8U$_gpPOlJ(V9W#xEgoL=pZ*>p_9^hbyzEMX@w^Ga_2Om8CiI- zAnNHt7{{dp{d9UX^6O^026Pnu%wptgEF>VUP>-j9)0I=T5NDyc!+%}O z9OHXE{GKA|yU2rN?xYc)Y|rm>q4MzW_Uq$DK3#Mlzu7`o zaDEG2F|``nW3h+LJSOQ5`#jhZ_i(26VglCa0tYVL^zy1{SWuP}Ov_~^p+?K9h?7;R z8z5wA2xpOcR*zPI-n7GgnBn8Hq_2~pNlp9N!3OTkRMISsh+meiNME}EJ{e&q)ZEOZ z)$qh1K7CratE7EeF8*A*0EV(0m)*5U$PBi}vZU+tFzF44(11h2R$D*Ky|nm8EbrPF zFM7Aquw-b64XLN2SyUjMN($LZw57a4cE5^oqYC!sd})Rr?GQ1NOQWxz&QK;ML*LA_ zkOaUTdb9cq_7_7^SxF?0j%L&KO(Xkdn7PrBGOj2?($nvGU;>GCt)sy#4V&kl z-HBN?btMcvt|ocLX4zN267$)-v7_pXk5`YXF^@jMrikI^`NbZ<&oZ2|GqS&8H4r;* zJ&LIX3a54_F>Yr`gOY_TEriAmL`u+Y{e5N-c;JwLW?3}APo6?Qn1Oo9jCmZ$E&n<%kk?exGAzb0ob1 zS~mp)^$QF0Vm*-BghbjWk`Fm+C7n=4C2np(8J6;>hM?nqA_j9nmvr*!j{T1#Wb@1u z^r3Xr5F_pMKH12B^lf75Zfk@KR8WVEUQ&}Hi0O(#nT_tBcbw470t(S%kS55^Hgeap z(neWpSg^PABp(OJt}WGu@WiGUN)K{RP)^rc2_#n6$!oJz%mHe+5`00_~RkE(Lp1%#Hq_y=L>bS0;dI$9XJK0DFJKB*y)h+*8P{D z=gKS}y~@;t!Gtcj0;Y%QiqilAoN zu_b*RrzY(&t&>3&M(?STM8a8S+-^{yR^V!i@jv9Fu0@Ad0xWZ|iZ;t-$Y!GMRuZh4 z9(OFg(IxTO=O-}CW=@dP78j1#{Ae8Rf~pOmx3z{NecS_b^fV(~+_KvFa|kXj&R@&C zYQVnQQndGlEZp%EDR**vy?b&4>; zH~Ba7kNgcCP)WMdpSoCqWP=b_7q4gvM#7^5-Y#`BPLpn~u~IE$`Wu=!&my{BYj~i| zJTpTtHWx)SRPzQBLu()xUFM!#+^RFI7l&vpxa{lV4lL|_PFTh*U@)bDAz7cUYeUtobTjjd-}+^ z6%NA+2bzW}9554l3Pft^W7n`x*Q6_WV+Mj^4PI_VPPQ$DRb%=&`XBR&WGO0F^r}pL zLo)rwLEq3~IJ`_t4{PZ>JI$W#mBTJtvzTI1R_skcQtG(fq0|Lh}7(m*a*y#3O$ zdBi$fM|Vx}^l0h%EV95WJoUQc1p~HJ_F4k-ZxXbApWru3EtJ~t=t{j$o0&o z`l_XxIwoXUI4Uvzea(PO7j{Y`)(@3z*X186c~u_M-}b_yz*XGeO!OO*bRUyvP?VYW z)a*o+gg?=Z`?dKS-nqnCGvKr??jeHmCC(@^Bp=G91>Dl5*N`gSSBE<0i7oVFFcRtQ z@yw;9*L&7pW~NO}e4DSRt&E$n2Yl50LU(RQG+;-r7OIfhW_&{`n?m~e^{w2=#O%Q- znPzyC^NNr}Ts9cU^IpTP6S88akC<9>r;<9h8Rq9hm~GZ7!EX}(;IvBh5=ai07{niN zOUX^$=$vh={&lR#$rU-RrHGu)KAFn@vD*4HZ~BZZU_N4Uy*Ek!fi}X-a5qFw+$~mn zFKRTbU|QjnC9@UIO;a>w=YGtVRRvbiCNFah`uF@p10^qKDARUsCraPB>xirlRLjq& z)5{lOUd9Z#Co{^)Oww8tL%fXl+!jKr=(^<0?vSY8hH;~B&~RuG*l|kB)@>xUa&2oy z{j3R|wv6;5EK?B(u;#XjHNJwekE-!l{86{NtA^|4|Q`Ym1E$Jwz3J(qzE&6f~88KCoIMs2 zGlJ&*z0Q2iRGO*f?_!mvk>3rQSlWNP*$7_DZQJHxiYlyQ@ zqNnFl%v-g@k-en`wS}q6hpwtASw_qkYMA@oROgTBoPFv1RW)iD?-^r@Yjv|b<={yOHdh8>aN%}gyRmSoc5ggE6)I!%uWf_(-p@58K`xs?8XkQNU(m&%y=o(|b#PuC!^w(f;&yOqJEDPtm{ z>xiZe$;39uoq1{>2;MOO0`2!ql$h(-$z?B^v6Q^Rs+>N0r>}A(2Ch)irN;9aP7afW z;_&--i1e*OppK9Qwr0nqnvsc5tV~23(%wZMFP0(ciEhXwIb)xAqMvx^J#PB428PbG zWLm0$Sf&YqvbZgVVsu9u-=F~lwTE*h=*Ow=9fS1YOZapjUEN_g~IdxR~^0T`d)O3P32M(0H<~)>=3p`G1=QaG-3MNFl=&oq} zS`U5g=9f|4G7os)er54m4~BB?;9WdXGo!D(ZrrADzd4Z)$G`qIeM8mOKx*Ed$d)?a zpU6J1**Qr|nVo1_K<|Dro!NkPF?VR!nI*>CRcHUk{OfFs z;T;vd$v<@GAZ?lIEFg5FZ{!H)W!ny?YeCKdHpk*%XE1MNqwr^|$2~h!JtBw-vwV(Q zMb(SxZ5nz^>A7N(=AVY!tu;TZhRX~YQ_sx9dgF1cNn8E9$!5OXIy7&p{Yw?#Uds9G zPQ!mxn&?O}?oRis_UmK`Ygh7-eC|b6$(IaD4$sQwzo0T1&v>mz zRm>en4swsH7><5lv#6Yz>&%g|DW|4V-69 zRv|l@k2nd)>TKh6h z@}Itu@8g&+&)!UgGqw>!E>Y|idU zBbRa$5^R@vi@ArCfvd+o$_PAa;YR}Ltr2AWA#)S6_BU>)4|p-%J3h=k90F%JfU0(D zB&ak|SDGI7UR>ig)+yDXo6|;DhBHMue-xc{)RdeF88ihQ!%k+Yv33o;%gC((k~9-q zHb|$1aSGPi8c}>%OEVf2aP7osTBBcR03Qd*egUx75)Enk(XN@1+O=Q*O&GcuT8m8_-_nDa`4P^TPK!@@Q zxy#Df>6!z?%V~qqEY=4w=54%-9@eDH68o>g$U?fiN0ASsB)#`eMv27=J=-D?2I-T@ zlI|YvYoEF;za1h2{YD~;UCYR9Ek5Rq-Yt~-8s|4~vK!e0#qRB&5*ZZs+`1aP)u(!> zcVpqDJ0cPHw`iRq$#*}uJB8WJb{1HhunNh zvZl!Vl41B&zu0(=kF*nRhQ%dfZJ9kOS%tW(#+dFoHCVMk$|9mIA>%ejO z@&mNMNJosKn>)&(#nSgzj(ue=GEhenq)YhYtohjlzLLHCBMrcbCYV{`t&MWiWe2Y@ z&e>1<PnUuVqnXS6+rhx_E78|0o=1GneV%%%1iU?6CA1HF9bJ^CVh zUZK69VEwFz?v$rPj)8>4cEsDe_cCxtzH)`AVz`)Y?`;_U672JQCqR2H=57)>uQILK zS`%IS7)ffJJ;FUH4>Tqvo8B6_7HPd?mZ=UIZ=*3^(0G;1aI4%0O(-qa)Aiy`ABuxKPi&!#~7n;jHMvs+Ia%vxwUr&*qx0E^)i`?pDT8RpIV#sdUhz(Ex|=Vjd@ zk|Ff-1Fcw&4bmGn@}^2iXE)9zog-(;wkIodn4}VhDOtOp)RonfXd2^wm_eS-5TfJP zv6{UY;b3TR7~fVTncO3cJ%iNmC3~+RnFu3}AQ{fv9q2eC;o6fmTS+C8fRnTx#myh{$)y9{KV4hC7u;Z=7i|9)*Xo?i@Rqs zogZ&_Nx6I}1dR79B#FaOA;>2VF!@4`M>Y^f`d24eR%p z%u_xwRj2m!cLxU8)zyOZqgM+g~b3gRLrT*oin?kvb>!6m+@N# z?bN`K(Mu0GAQld9dh~!pbMW|PjgoFmRshxIfbLU*ldEaj*|&y;)(qX+yiUWO2IiYr z6-s(GSJv#z0-GlK+EL^Z)W04 zl8IxXaX{`pTJ|ZcES2gHO_$q$WYR+e-MItHd)k9RW~X$fy;H8a(()g6eRsN?F(0qW z{**OvI&UL$n5;UzXKZ7`cN-h%gLliYW4ZfU=_1vxU(>=F#d+G?rQtl>NZb!y$I^7I zbmg`;)Z>h6I?Ybo)VO@GgCBjW4gEtikl);5O5et^3%v z6Y#{8{mgir?3hQ(d9+&OxWcncZyJ(R1oq5yKR;13nfU~8_I%t9FMmYp{Ad!h=*;HF zd}QS9n(~QEV&J}X@JD9%!$Vky`bjQ}9=>Dh2{T=~eRH`^BBmd*@I=z{4pLFGWG0!V zv5cu1`g9!`WP(EhU0Y_H`_oH7v9)*NnE%wNqsP=L+#j*7d^sP(;0@i)Ks_x}5Sri0M5r)hG|bDsOY zuj_mLoA}hTN!h{&;5zCJ(Udh9O!XQ&`FbxMTb&6t54@ih;r3E!c;sE2#ZP#hwIqB34S_%Plz(7-aF%(O2h$5<>;c?PXV>DZac{JxW5MSLy9JQu3O87JVwmRLMNg zd%86^59-4Mj~C$$X73ig#lm5G#XwDU;2k262I2iwGE8SC;A%0n_ayh+j zpKx9J&;m;3g4}%H8G7*OB>DW>ztHl7^kHf+OgG#|X)+(3x@zn78O23&(BrT)DuPAh z8E$;kX1KGqK{+l~LVtWBGx-DNZSmm2ti>BND)Eq>Dbc^H8`tf`M^v{{)*7O#k23!W z-7s`cSCKrKcp3jAe$~@Eg!RQFFT6xP*YZ@NX=59!T2w~(PmQ{=9n%KQwO8rRTgKZm z`L;~w{fU|LQLkW8gV&$8&Y+Enfh3Gs+7y^D8fS}D)?A}Mi+BrE;&fu_Btd_XO3_$V zW8{BL6AEUj>ah*zrFR#}wJbdC)OmvsZ~=@}ItH<0MLR4=E2|a3Wx-tP>S4n{al_Lu zw0^~S3lg&nKt4nSxp!>}_hmt%9-rw9_KD$v{w_=#TM80SzO6qi)MS8B2{{R+|@3QArG}Pci%+R~Q ztZ$^p1&iIFm|b1jNdNc}U-ulnkZpREKJJ2gNOoln|9q#s#QG{n=6IQ>Ik%p-F?Rzr z!dIKsN2#rh9k311DW$UEJlbD6D`xBKv?2FoSzcEUI3hhZI@_jy^Exm2X<*^CcZ^k{4+=LgP)sS#o<=A6D*)s>$e;) z=38yWraAt0vG}RUG*cAr+V8x7@;fU{=C)rsF(`tOu}2qh{9^t#wenN>mCV?w7lT3m zjCg)?&^?+0uhD(eWtB}?Pp(_6^=}~FRF1f*?sY&GJe8xVoD5k%?~THB7HZLP z*z$`p_!*rajGF40Pb`lqQ4|He^W(Fo>IywoRMRZjHo>M?ZE~??p+Wj<1eLCNIg68) zzi4u|M~&st6~Xpy7?&jrS{vPOOv^5-EYC6K8R>LZ8dzopZ zm6=Ar*i6ekR5=gWGGiVri7I1R-?Bmai6DHG!qg~cL%u<`C*i&}qYc3}0n?&y2@4-0 z-nVV|6GD+3YG?E1i`j8|*E}R#b=3|Nia{bgD0GC)M=(4iXW5Vf8au z@f;&E`rxQ(BW)`}k6#aOc}>pMENFA|(IjI_U(YfqOz@A}duh!GKF8_dbRdHcx1wEL3L}81+?qEw(KU5Jiz#!%`(XM73KtzSoxk1a1adSF?Ad~c{mjFW z^+};qBklBqUdCq0Z!11{2jb!#PX1-90@fpssNsa&36@c=;uuG7j@Xtn9rT+VZ1lFi z;4K~?hDpW5m=$mS3zwBve9}A7DwWmu@cMhu5IQ-JEB?q!SI#q{z3K6TSm3bi%xn8M zH3`D?xY z{7qShWp>fOe^T2#wq|=ph|rcSWU~wky=O{h68Ue=PNG=c@3QmDhmb7cs6eEgtTLwV zm35TxZ)KL0l7>D4cF$dS(Ah_Vr-tTg7&CG(#yOkF*%U2v8f`lc$O0X-<0eHpd<{qc zR$j|uAx}2(J@NV+n;@Urb}XMXjZgZ^kx75i;vB-yMi0p zY+b=UE+Up=?3SUUvrT)gYdHN43N6lO8#>4QqQih$9$k}MYjJa-KQkrcmL%SiRMo;# zH(PZz3yJK#(Z}L8T+3DEFwSip=w4_V0x4Xu_V)NM@8b1$@#E)BwdYNg=*17G$Q5Ao zUpG>4mo%KpVw55@@oK({7&<8isZ1~7kPbp_03 zg2GTCXZST#)1%(N?@%pG3dmx?`?jXzPvQ1Z#eL}(S@QVtd7a1SVS*7|C^H85o6Qw3 zb{>9yd~f;>d(##5-F*Z6cWL@Syy4|2BAaq(;3{j#M~~HyPOB(iOH(w4?Q2OF|J+tU z{r)CQP0)W4kk8b=l5|pqTgAUQ`kg26^@R=T;Z$COblu>=ob&#V?(JeByF~5cKX*{cgDtftg?LOx* z!mlmTxllp`N5@}`kGvSq+*X^!LAHBcu2`2|6m{MikD;CxsB5=t?IZk+D@OZFt>@y; z^QI``bK#~v=h2wv>aSuKZQ{*G;#IAKQEX@an1+hAae+1zjzBXvz|X%1>#;jt5s5(F znXpS0LU2vjnWx4(c%g$2Hf&Ah708%$)4dy5r_)cLjalwghrfw{9|*Kh5j4BHXKkUO?es7lfhkZ*wbd9I8G@X@pLU}`is+iB9WHmp94a%8Ru-6Ju zeid7n{5AMD>SNW8ktv89d2rJlHZVHWJcThIjT#oSJNFeqpm9B-j+npF*VL5>>V1ai zGRF58;Ul`g2yV1G3`e`^t7mwOi26@1AniPtp#STb>4@n$^PhAo4^UZoqsbZx0}CeW zd)yEl9V^`aY8foYnn z8pT?1;fAN=wQk1wgj@m_LgVbscdac>sFJX{djCFp-7~ZznQpC51E&OJ zz^Xp_VJr=v|Jj3R|I$Ye5SzK$PcHF2XDbfZGydA!>(lorucvi763#=|C z*>eBu_*4*HcXY%Cz&~^3wMWNyZ;R{u9 zB!u?QKsbiO8#{w&g+t#1hhoGCMoCWx7$^|A1ML+az`#gi963&vHZ9&JDNlkifs(6KH3HJp%iOIhD=RhUs)bp2 zZjeeCp?(Oj3P+Fva0KiE@PUkYGR!IxlkYce@ZmKMA@K!f=m|D0(|;V+xQ-x{b9e_D(3Aeg=&cF&F` zvtQzo=bC@V)#6o`aJ430(p` zwoUR^r~QtlZE$Q=7D6W(9jNOpZO#uQr&I0?(L}Vr)4r9D!8nMg!mJ~)@}*3CmS1Xw z!))wR8fv~wlR$g`H+E#bxGt9{CV;fNOkbzlyp5RL#A1WYyz&s57*jGUaaV@J0&@~( zmmDFJG1KmxEt#OabPvR;@!aJnxWr7nG{1hkICbGvF2zPpOg^mAb{s_nsV=g zUK}Vk0Yx9`L-uwa_{BiL+Vjf0(BnNbKT$qQib)G(DkONiJfF+-2 zBS8(Zk#Gv&+vmc)H*9Pyxt6HS>#ifov(9ZHZg>GO@xrkqSPNDZD;dtP?i!*jJU4>v z9o%=OcpzcmML^17cd&g`Xhm!pJXe?1LQsGf-$dHn1bzu!1G{$N+M^64DI?R*wS`?=k(&L`e$WXxd_I(zrob}dfeex$FT znp)itAbxuk0NkOaonm_g<#lINd4|~bVPet0W2%H(E>F?vPm%g%{yZO26Fj&WfQ-v8 z0Ak&47F}}XEH=XSJpHpdFXUIRXsX^dea~o9;(PmTQ0hs>u~Fzn2CgnCazo+I|{;l|1t*p$%||A1;S7 z!tiPW3)d9g$1&O(PBVe%Z?7;bkb+aI@PUx$lgNv1=X7jd>?60Q7m{XuoYGXlqzKn4 zRGd-9woM?DU--#__q?}y??$& z{x@{@#!r!|Q)GL)yUpck^LTNRO5s4hHm?mm&o-MUKj`$@gCT^;JI&0YTBkB+4P%Hr zM_Amr47V!k&Gk+88mFrf3m4WfN4`hWEe0-1tlxP+7zYDM8bGk2{buxfB>YvgOfLyn z?V^@dRJtd86#ZX}xUiBo{T{sm-tF-3Az8>i&kK>jxi{YD7LNU7_&ZpV+3h*T7Na~^ z)KEm+fPqL9fS`n*jfEl8HpweKQ~M8M;|KO9i&iV|QTNpL4Gu{jV_Rvk=Y^vkkhSA# zo7>m7;)SDO`G)lJWH%|5FPC)=Hk?*@O;mr0OX~*e$R=&w3piD1CGLlOJ*R^Hc_66F zI9SPzHF`(YOgViu(fo~?b>^D4OhW3$ak}aI#L7{+nL`Pamr$I|^F1nDk>Vj9LmlyT zmG=QP@N1z%A z6Y#{Zd4DG1q4Hb63-_v>@A9)?4|byuj1jN|w|6^RAI>Pm21|;pz#i^$WCfj^d3P4PM80gNY?1^<$djP}^>CXClkoCq{-Iz5STm%Z=^ZiteP z81bNkdo+{b#lugpI(-dsW6u!o3iad~%U~>zn&xe~p84NLWev~=>a*k+ADJpPZGfK; zx*cXZo;6pAXWh~G!Wdd^p;fkqVDAg}MkTtq9ZCive?U{SW&*2(gg(y*Q^EcrE6xF(@uKo2wB0*w^BXse}9r_TY%2x3j_*TLC0 zmdv-+mgdkI93qP?Dg~A6co!R0D`a_Y!wq`>k%Wos8JAMc5(zRf6h=7#cNRvhGgAvY z;4T3n1oRk~3&Xu(dY1wHMEz4N89uz3-k^{TM&!Sa?fI$#yDp>DY?q1_DXr94RvUm7 zRdU!!3`Ge}JZHEcIqbNDz%0@@2mDt65{=lJgOqKoY=Y7gP(A*>bUNRVi7%4|qS(sv zm#7bqM+QNrMNtIVa!hF@M$;uGgu5rscOYw#JAVZR4F>!F;g`tkNKARHq&W-qU>wgN zE0Q^@!EXjY6-}I4Ey?FFX@^D+oCH)wMi5?*gqoXOh^-@c7xsdZrgbG4QQ&v;4Ueo<1YO?P1_#t~S+bLhTY7jsK$o zrv6Bu@PMAX$f5wvIii)|@Pl35^5guaBy9+K?iHan+p27gk|3v%R7ZPJYBkcehRcxt-Z@OGlCw3}Az$9^N7H~b}0 z)8B)atkbq3EhA4#cyieFNMR|dtjqH+1BLz6t*ahQB=+$~6aV`mKAKpFyNwU|I888f zqxdc~IxGu^?+ttn0piTbc;i9 z)&Ih!Autf92fo%D7!(%1YRYy_QpVE2vYhxkmR`M=uH;Sx=f{5gFt59fiAM<9aHdI6G1 zJ^=_n`CH5HQdCI7tbS>|)J`$9ZC=!RSHP;HGY=a_YRl6=4T2fp@eqHDxH{okqb zHDDb0e9TvLo>qQKc?StGH;Jj_C=4p*Uf_Z%ub0F2kioyD&GtwEJ1%7G6tt=vp`ms%K6$EJ=XU?Rx1K-j1|B$cHYQ*Kd6$!@3iFI`0Ra}tCj zmXRWz8p66kcH~#uVL5qmXDxBu&SLS~65jDP=XuvT0MaS~@>@|>BJ#YssM%$4hy za^Re)()_?Zt60XGWR`$AKBy+WUaxEI_1u@1xU&|pq;PqQnA%XAP>~)-`Yoj?IRU>d zPFSAk3@7EREB=k#tL9%xFl6WABm^y3on&%5+9$@4so5-UN`}(hmy8t?OyvJNh?EE! z5-sC@CXv#d{`W~_Wu%k;(QZa1IZ~l!p-q+X^1I} zB)b6|83+aM9KSq`)HkIOX%<;H!60`1?>tiZktbUt%_1YQ;3Wl~Yc;(wheX0N&ORyn zCySS+FwXKrBxKqt#~iZkzr#oTfI-YnO8RI?&hY_TLwBenWJ9Ye)YArKh9BF!Xj^Px z!gwmpCJ~WRfbElLba}b;*0u!9Sc7ap|8fTFoi7UpGACw{E<0#N^2hnDBHlzPt11-Q zp>r<(ZnXCNzXiK(sfR_Xq|JWvJz5?=!AY<3uzHrQ$YZgWH#!DPl`J|~%xxAP#$2}e zu_>5L{`;}usJWHR3KU<-){dp&9Jf7`b8Ybo7F{8fevs<;U2wtK&Bb{Id3)k9{$RpTgoOVw{svd|27)k|JA}GtP-bzweb0#YNSabD9!D3%T>c zVSGcd&6^fKe}^=8^)g=4W4>M7acXH&JPU^-6F4od z9Dj_fs%P;vRk#FGOf&YKh~w{=9@)!PI2h;liK(Xx`Iv||A4&LfJEz~y9egBlY`yrx zl#?3$Le4XM?+7%>AH6O&nFAi&3(7lz1(vlBQ%jOcqE+VC>Q;g~@O&M|Z(dp&wN9hf;+k6}>(6B*Vf8OS=p3&;J$gKR%7- zc>`}B1l0H^H?%P$(<)|@z)T@eJ$_LwUQ}ZaiglLYi^Jyj{h{}%b%uv z^b;w!wXw#M7G?)Za&JQMJ5z{vY_9ctbv1%;mx@c#IglcTy&^jPo&w&?9XUV6be?{% z#(Oy-;Qy47B$fPWm*#i<{*IAX(~!W7wcU3mb7b$=XWMk@ky{(a)ol+GUE%Vb=F9}l z8n0M&-DUkg1fv{ltSqbiZTx1I$1pp>Cb!eKviPvwU&d=oaPR@+70%hbgm#Kb;#d)g zu(>>EHqD2;IaY)WRYXGVTt#H45bOQG1d@Lh;ifWeY*3KRpIs^aMQk|)t?K{(0slWo`0|ofz>_cQL$ztkQ$&Ee7w+Wg zB6aPn$+XSRFUd5=^ILzhiYup#|6={`$D6-cH2}l8n;i7*sVoF6t-n~2tCw{q@{85) zB=|b(FIL%slrJk%^%bePQgZEo(+J+lbUvF%KbWdh)5rL#W12Z$DQtNnk-*zl(BX9# zINkiBWL<%Q)sQmt5sM!~$JYeDAYN}wmMfUPw9C;1NErL0QQ<=I$iHDkrici&Qh0!d zo%E0yJo_>UZ$3Z|WYV|w^m9FFqA#Xo52EOiNcJVzxjm6=gu63{{RX~kzq1EZybAah zP-#s#!VtWKEqE!yJo`F^J98vma&Vacz#%DU=Vz8lu%M_EH^5h?*`o+3Xl@MPUk#xy z>T$5y&5hhvV~*c=S@HlOrxvlG&`{>8|4wYl@6&`V8Nvn*Ykv2n*zw^u_(ou5;3+Zz#WKMoAn0q&pK$0E~74(}>acbC_l=I3oky zJ;Tbw@RFHX5Rq(OIBz8Kb_L?MdWBK92HzkBG7Q^2yTqr7#BF%UE3EO5u)01IZHIYC z*Fqa6J<1#me24xj!=y(SURQHBG3G$r?qhODpE^XBd6~mmGm%_^TeVP2S9t&&>J2fQ zh4Y@AeHa&Cqe9yN5e(0HUdXy6Ej*K0A$ zMH{q-=Zn}F0A!!m(}yHv4YMyN!_@5_g!OX(s*nx{+4~0Mehs>P@VXxUI?Iw4T8Ui) zC_D17687Q#tX7=V4iz57m9S_&9cLLCi`)r*Y76m~7xF#Dq#|S?A4!|7>8n? z9PEtZ%*vl7ie*QLsV9y6t8U`=8@NulLV!-#NN88#NG%L4$11>5J`;Do)AT+s&Na#D z)vT344hWsIWYc2K zZj#vS2)a0ZuWA0Lrg#-<;*LSyVllO&=^Bp6WBPm8GcJ15OyInc_+($eOk?|l z!=|hG%?V7od1DT?z$-Fj?YSo($E7zIH1LXf=C3)%_V5T(MW;ylZ_g}7lOZ;8S|Sic z2QPj!Z%e^fB^HJr4eZd<)7E6S_Hk1C^{r#po#->dnk0l^bFKvC8T}JYy~>6LnFJI3mKDc4QU_ywMO~p9~TM=q^S2~ zVfUvi0aYTB^{fP6>k2YYCw~i`8jYVJKV@1eYi`^U#wCZ|b!-_;D@Capnj#B#3bRVm z(5nDexGNMo&vCZOm81~_47Laay5cTy4_bDSlbg-z zA}qk>!)}_VM2A*cM$EL{e)xOKs|6KTyNSbjwOfDJbnAMzN(%1?(Z}srbWXoE-3J{3 zaP>&Dw&z1i?KkfYEq0Hf--xg-ueNwguusxXayqmkFu)M+YnjJ)v`*Y4S z;%paRpF|fW1u}5Bf<)3`Hs&mPHtFTXR{qB%`X?^_FeQkN#pz}#++%DI@r>#lzwAuX zcP25nl8%!j^x=F!Q`haz2!kN<9lx!)^=d|dSM`?SI* zxzcnQSsz<8%x>H?Fq>dF5pf@?iMWq%`I+&T(>`d>|KGR|QTRV`AGbLW_t9f_4!HcU z6v|(4K3Zs+;!L!yt9@=CN^_V+M306^1O*o7fY8~2Q?!Nt zu`oN-0lz@h2<>${m&gWf)rh+6boJArT3J;eqbv)#`i!Nb@d{ubLg?T|ySiy^u4XXO zXRHz#ovI}a>LZQjXu}X(U^6dFVHF`;b%-f$>U5zhEFVNBM;M3b12ikx(YJ>Fsaw0I z3=A3Ert%eqk(ju~oxi34ljJp0ej+{Yb4%ERQeRmU+Fwpj#YpWlc`33$g#MT)iw+Al ziRUjGOM|_aGmJb-X+fD}e4uY62@fWyFtWoo_pL%`Uymm6yD8~o%A~A_STAj&FjPvG z)K!I~-S&w2WW0VVn4Cpqm|v%)-@~&J5t1Kf5JOrE8XNQm? zEdZ)IR~pWCA&To?c&7hiG2Q8>n-ZWlUYx{FO~h!= zRR%By_^E-#h(T7gqmfGgfgVwBU`VPuCeqz$k%ArU&-Fe>q{&zZ-?LcwEdh=z%<&c~ z(=KN@gkt)O+6}Xdto@78RxBBxl=Fr4pRCh1S%Du=s(S|hg5+BM39e)+c+c|pRI@Ra z>&MDMN=>Y+KImX8j3no4Jgn#IHtdPTO!#4m1oH zxaDB17zyRrvK_YCsYGd*6~dv_0IgZc-&BUiU<<1FpV{9P3)2eKu- zd{>1QY6Dm=^3CViv-g{O3o1^ad${TZrw>+?RC3_x6JK(SdG5`-J&-KF$7wJpK~G2r zR({1_tAx)A#vb!tDtc2>%wL(`(!(aFKCsH!!=Bt#k`n*UF`XNu?0&sZNt*bNW|#QM z*wYUrD%yP=1BVl_0jOiq&s4XVe@HLh7%x1|{^zL*|3ExxH{ZP&g9PiyUEG1nMa(Yi z1BGKfxGcBF`^9+6p?G0KgMX}0T~N{-|M*+pgOg#2=D){igD+UdKg#jf;|Ts!3Etxs zD#&H>^9($jCbqXbR<##&7T;IFzukdG<}}Xx5J&fUxr)qSuNZ6>2bbl=^#3X)d4IOa z7Dws6T1{+VCiKl`ziU}fG?p0tT1M|Jm4yf7XY?tFi~d%jZ)Z&nI;-j0z}%B5LLHFv zZk4&TvcgQyD8`?u&_7dQ?Pr;Nh79MT)mXs@3)!!kM2WH0LzpWJ9)Wtek%S|+xy8>} zrb<2L{E;n&g%Qhl8v%po_^JBrQ?lmGG6M0lBF%G9fC|IFzu z-VFrlQB2jxHxPYMJX4;0qkc|2tZPiVe-lw=F#3`;B%tMeiNU0j&s{B>Co`4KEec6k z?LpVep1;SCqJE3d$6CWZ1_%h7IA0V8mEPH@7MrzJh{e7iMh^ z=g50UFA9Sym08L>qZ`vuTJRU<A&SH{U%S^ZCnIDKxGfyNp4ut4A0o&JBefLmZuJOta?^*W zG1+HogcP1GRwJuA_Wjb3@lNo_#<$I3ip{*SXV=B^a2u6=*Nb%Ahd{x z3GU}V(MucahgCGJ)m$bi2LB0uWd;2roxW@^{AViNZJd&RYvWTB`tNv43xy`5gXQqut{*Fd-2!Ms7eDWg^LO1dF~ zf(yFcMU_(j!1|p`YpaLuu$N%*X5TNiKgno>|ZIj|H_x^mRch<9+6_VaUx)Zy6tAcMpl=VAzFwe3O+|b9$PfD~{sPtZ7=5l2bzWrw^Kizv-c&^TqMmIBy0}Gl3Hy;_gC5k97ebZ&DxA1ObWe?H3%W zC!aCB!t62yS%}F!W#tKEs5~JnTfcdMsey%yoQ|2w%d$S!-^a_Ag=lIji#Wod4=N*J z{mM@WJInh*{Hb zX~uO~hW$>Ta5$ZRIVt&BV+`68l;*L4%74{NGheuh{+fp6huMprm2vPSzZ`aHM$00q zXM_mRY|t1!WXpG%tVQoahtoDV*8Kpqn)C#cCVQwl4WR|-E`ni4k!6SUeLK(c< zcP8=6b@VlhswajVMB&B-5^d_294s;+L;}&n>D0vq*30cEg?CL$>jxEM0*vuu7a3qu znC-I85Y4cSo~DBq;Ry@rkf*|&-==>iY4p;)uX+}q9$r9?1uG!6OyQFf&CiOsHT-D_ zR$-yFSN|?@X!9t^$js|*_q$wPUqRma$E`o7Ud$}n%@h0AFPpcN%R_g^_h7vG?+(k# zH2xHCIG1LflVqN$=C)V^JZOr=0)#G%h!@Y4LfWkx*ZbQO@oD~MUSF?OQu7hs^7cBN zNpEK{MBz8+Xt}>IbXP-{zCBnp6aN1J2(LEWYYbx8EJhcMQQgPQ|?IP;k};*iJ5 zTu$@=?Q-e=nH(>3x)?2S&|kH(kV}rg$*o)om#H+!3yE9a${COu>S0P`Zl633zg8U5 zBH`_Db}-tim4s`;ufXx#9JvA=2;!9A!u^$lm?g-x%3N+Hjw|GD!NZxHopt(*OcuVv zA#-@~^Ih^f4#~XoKjPt|+{2(3atb_pKRz`F1ux(v@+Mh zx$g0MnSy)iK8PCnh}Xv%?lbRffg;<6?DC6VI@?U_$S#%;b|kkdck-4aN8dU8JDQ1CW(n82k**gtABO<@ShQA^SW(HxR;|;m;Y~N}bH(39h&NGxJIZVlndbmO*&hBSJre zc_naA&U}7}*{aXrTe&E#J60-IA0R7Jy7hp^WkAxyJ7qrKmEo}+V_2lUt`r9 zi%qljBtfrE$U11~6Gr|aSB!XNr4R}CXrVg6NIgP*OZkOLo-qQZ918Nwd8xZUBY{5X zR6woQPv2MRx38zORTv9*@UuOTLshnrnP*CZ{SdQ za_xaB^yTY7^s_m>f2P^2e=?f}=P?C=Pm;m@kj&~xOL5RwmHf)7$v*Rvl)&ba3}g9e zvYOMH)7McIyDqRu+&8JDg(N>zb8c*x;q}g6?-74!X8t0dlfKE^R>WI^a)Y=a#WZQ` zLZQ9Dl@}U}F758j>+T#tQTCYuy(RT7UTOL=0!unQ+oXv0!rFGWNoIHOr`Yir>5P#~ zyE89@Z;~e6Luun-vM3D7pTkX`+HCCa5MvJ5y9@YLSC03RMZGvoPNf!KE-4nMnq|GB zgSOem|3RYvAXL|)$iwS~a#+VeQZPP9qJu=?hMkZ9p&Pn%40vIw^SNC%Cla6R<6n?< zUy$-+vhdQeuVj%m7v^&O+Pug^RQm)UQ=Wru#$mz1XO`WW#z&l<&X{dVA97leB z9^O%x+l%+k2ZBm|KEFL{{OL)ZPfuF@2U)&{S9ajnXbTgBB0#m?yHxB1$MuwEd@1Q% zN=#)!8LM6CaJutd1;R>pd^zb{PSo$Q4!aB7EPN59F^Z7Omp^WZTV;qtj#^$9(2tRU zysk*p;Tn&SRS{DB;beuwDcWhKdVG7_s_k(a+-U}Sm!2mwhe)wAmDBO3na-yfx5EKR z0iFnWd3e&y5K!b8BD%cd%JE?m#Tm3{P7M7IyAA$SZ??mc<3(b60iCg{%Ay`$!Wx#a z8Yw<&>A**7ZGmVnu=^b2x5Pzni7V-cjnF=Rk9_Mr@*Rj~LXqkkUpr-J?G)k=Z-`6F zw>$CyQ}&IYm^5@^lE#kZqB|BiDszcj8JVyb&HNZ;&4x*8;b)U!-*mECsj_~(L~4@h zXYG-J^VC$OfWJ_rQf7{*LE%j*{LGtY zq?)_jjDw0vJYCKq#m+JV30nLO6ip9Ws1UACUe4%ysD8dPbZGp=CHN?QafzZaFuPnY zGxN6@AiE>|6{mSxiEk56^UlmELU$tZF&9p}z&AX`7#0nuw;5hel(+eh@nV^ZvP!zm zj~fJwWeuzctulQsSt;I2baNdKf`uK}T>BeOKTRunl0L6VqPA>xpLoL(M(ZrkrZ1^M zJ2b?0KoC~fP+D1L>M!3xLTeM5U(iZw4|g!*j2YxZF@s5-h8JLeJenPLyAv)NxW>FV z1H;J@{!msuOmk-{svytjAGC@tfd7z-zSRF^lKISbe$P}=Mt_1ks-LE3^A)q`0$#tr znkqfA4lmW^AdncadhYSSn56b)>r$%wI^kMVCSHlz%6chD=1t@4P@Tjx#q@ zHkR{g(`e~V^vhAe{QXuawb+at7UgmYFehb@_Xg@QmIb2J*jQQGXe`H0KG)cu*B6<~ zcsrSbtB#a2z<}RVPO6!E92LPh>6*1j$mO+Y2=HNu#n?*+a~&WqT1o^$zsYoh_oG3~K_NHzI37W8!A2*?b^_m~{do z5tN>iCb3m^UCZm6wTKYIQg0vgp|9|72-oQCSuD~i_};9vnqXH1wNjA&DWxaeF&H#v z&lBg1t=(u|!^?!>?p!n(qQ-1>wx&H441xHh$dUDRQMm|CHBPZ0{g;p5Qw3E3_-l}d z4WFFi`3u!tOIov*>vv@8&!mvmjv>s=#%rA?_tNlAuSrN-Ww}iw1*SEW51Fsk0AtbL9i}hs{^)UGX}Zi@Nf%8gb*+!=W-9;| z-qS~CXIU1ZC#erJr#G+)_a(ShXj4|+KO_b?B**ulKaehVn==M z??L~}-`^8l#XrYw_hWJLS5%Zcr6}eKx2K?@qQp~CQDCpYU(l{8b(g@k2J~N(u<20i z&P3K+-AKEaK7WK5+T*vap_aA6RvXD1-HcI4#w~Ap7(>L)^<$|i#&;A8F4hWp@wM^+uVPX4@jyH+_Yzd#mRUfe^oa$cTd zH>@Z%JmJLd^7XA6<3v!Dc&gsSp?C_|`DqEsf27~1(a-G6i-xOn^gLBa_BQ_XRtz3s zDnWG+UL8ImEK5i(Q^fi^R+cl#1u^FKUh4>njWu&A+$k^}JNx{hK^wiQL6H{=_0qT-*>)63clRmnrY`T1bo#T| zVhvydF>>N$W0`2YaP=;<4PvPCasGAS4ArjTbGe;Y#DUut$TgHZc;9pu>I~u2t?UeA z3D9G>8$=N_fes7<^n2HEL`iviol$as41{6)p`0xZH6ps-K$@1+!J-4Fb8H4OT3l{= z-#l3`mxk(8(5@iW_?oQ=*49SP&lAK%Jm30|^AAKd1Hisq5y^)$jE&0I6f=wi zxw;l@b}yuwkeJ+9c5SB~7BK2=KMt{>DV9i=7*W~Nd8)46P6 zxY&P~rT!Ld7jnma?0r5qJJ;CX59x7%&EugbSXBVMlo)-?<1=Qva*QsgG24+-m=}iZ zMWjbD8T2@E>5l6Gbt|#4uriFS#9%SnCof8|d4)@*LAiIIWi)of&9>GLXc6^(yYMYIK%CvF(2HWKi;RYKDOsKc4CM||VPSlwE%g5+lcZdHjw%wq;wowkOgc z30swft=PsIX-1=wWQ=2D6KoDoFJTKC+(@?ZBYp_k)?$I#;uhjU1Ezr{O(3mX9!XDt zQ_{06n{8;>ODpgQ#^iQ5B+1dJCJn|hg5m)Mnt3XyX|A1I-w;^#PjMR9x(2!neumvM1h!*dscDE@Il|t;v zjMS4Em9m1jTP2WZLaM`G!Hyc@M-8>`NBa|fu>mQ7$G;lV17FNpwaybT<=tgYuob8L}@ZAz!z2dH9M6RuPqmscac!#Wf z5ey<(mZJg3(S6vcT?tYECQ(?$>(K-e;C*QwI&*>#N$zH%u@hOT6In`z{DX;B1cjHqCJkj=*vib*%1q@Z zDZd0teSmSsEpT)1CF@0tiFWvSDZuvUrT6CnBJaD1G*oud@a>ZZ{g8$gVc_4`hoAu%rhxf1)>$0KZ^z5m8w#pUUNZ=ZivN)u%6`3WSqZX zL3h$rANx^d`bU*9xAgryTcjMg&_G0Ns$ti1;@5I)L3FWJj2>G1NbmI@SR#@P7^tZ3 zM$8-^B4EL5{*Jg$p<7J_L0{OHbbJO;F2L3V(rW_l*O7kTD}@76SPB9~Eo?J$|I3Q_ zbP=y2SnZ1?{YpquBsIh?msVOg@##$4ZQWV3^LY2IHhGs+31TPf~sVNRQ3Jwc2u1ml7~z2j`rGsXtj}8_e4` zn5Etv^r}_6kRe!-y`?0335qBFLU4@+#=x@*{45{>%N z;ZYtsLJcpYthG#Sds)bk3K=4Z{-GV=N>KpHK*?d7r2rOzP(+xlVJ{fgzhEees(wss z!_`<|EQ(5zsDu(S?6u4-uVsqo)gl#-R&db@nW*jI3S zVVAh%CC+?{9uJvL=^`tkLtcDg)TBgrvz*ND8x*Bb9QntK{re;!JI zF6BeV4do-J=ob1>-%V2U=VBuidN!V&9?u4tv3IOM2_vXxk7aCmEW`Px!-o5B06HDp zSv zSw+!kMUnG{$%d>$7{nuNXK89@>6y|3$Vx~xU+bX1E1<8=M!(O>PG@Bs(VLxraFd`z z{64GCiPz`Yd-!0Oy<;DJ2TxDg-w|uWW_EP21}@#eUB61YE7sYmubi&4v-|Q>_vItu z-PW7%4-5!*L8K3*^UK4@!&+yMKsxITtFeYoHCu~9i)o(uWmgGy<^JS2Agp%Oz_df$9^ z8Qp7E>{J||?Kvu)c(vZrwoyv-^ouV)vSkhNzAZUYK4f4%MQ;V^PZmh0>ieF9xa>UW z^YqO{X3UzyPm#i=(hhq6qipQpg|UOC6|tcn;hA3N9a-0g540H(Jy;#uZQ1N{|E1=( zEZnW+8m(`&u!E+>Lp@!Y^_=7mSwLjar{=f)2e!&utPK# z(%9IZqI<{Th|wM+ncj~^iF+-#pzu@Ms-d>@FvjlbD}}Q$>27s%+N~64SuzBB;e*PB zgl!2ttnlm5p3K}^R6P_D!w1;%Ejt(4lerv?iv`Po?(mOw|qlFJt+_zge^d_o$ ze(f7{zAXG*1klxA&*OeWg?cfna@3oVuq;mwr(-U9=q;W6zkH#kFV>D(IQz%+#mJ$b zZQbg&NEfRMT+JvxMQ=A${rAGE@w&B5b?dLZC2k7Q)wT4aE(k5%wN&dgA=RT_{N^IQ zuETS)yL6Mcr*5jEAU*#ej}D-mh3BS6qCc*F_ee!(zV4#rQ$|FK!bGF^58&1{3QItkGTbp8|a>p<9`;coTOVH7-$Q`IEqd~^-YsV}6c_3TjM@S#Neq!IiNS{`XXMv_Zk=gt7V zk`#V$6G=~?{{MEqW4PW~T_v3hmRr{T%m9Z@zwnm_gb6R69UpIpL(ET4*Obg)oyCzc zd~);w%!AK6heE7AaXEPB&hJFF?r{D?E%#juPVASLE};u+6-A}|03Kpt z6;IUi13lJfmi!HydWt!&)2T)CBhUZbxVm{YZM%}$BP}Q99rRe8C%K^TpN4iH()fmU z@%Goaa}(o>D`z0kl~WLC`pQ;_5FJDGPZx!!mY3kHOmm=nYx1Vy(Gxo}Cp^z+%*=&Q zWKi6^U>Y{f?A(0&6mD90<2JJ5T*B{h{}ozS{OXhB_Ww$*ekt=F$DCh;khgg#iQ(Gl?Dwk2YX2a<<`@Bc$U}vfNtg88gpLjl8XF|I5fN*-V zq>+LZFklbThZ|9%IEP;nqi2_rwyjQD?7>uSAxG~Tb37=8)6q4UGlv3NilL>MGisg@ z?pupj&#PNUhLr2?)l~EJakKEIS@H9vb^Fdf+HDk`yy}_A7p|FqxU-M3+UZ{}tJP8I zr(OZLMdGnUn?)@ran+z15<=$o$~CA+;hW;GJ4snlT0(8YE_tUO-l zd=6eCR)YUg>#hz3a#HQnP5ExXdC*oo$JvYQOT5M+tH~b9B1ZevTC4_qApX%Oe$(YO zR@>~r=}i^HnW;Q8T|B1WR)UWZYsh{$SUYvdias*L&?XQJBB;XALu{pr1V7%rt|BXc z?@MMjZ~Chan_&S&i)x$kySMscdV8BO|JLX#{nhm12P=xKo{Q1e7S%q#9`Qj`1>)D=J9{2~$+xC6&Rp>orW^wH47ubV! zBg7WHchG#jN!Jt|6|)cez&YS+ZR><$5R`(hGTl+u<|^4V^>^76BW0wV{Kh~Cs_e-B E53ICynE(I) literal 0 HcmV?d00001 From feea154e89bc24d0b408c6927230b4987a6c4357 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 11:02:38 +0100 Subject: [PATCH 048/176] remove working dir after test --- .../eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index 50fe73d9a..477a4326a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -4,11 +4,12 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf +import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.{col, count} import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.junit.jupiter.api.extension.ExtendWith -import org.junit.jupiter.api.{BeforeEach, Test} +import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension import org.slf4j.{Logger, LoggerFactory} @@ -20,7 +21,7 @@ import org.junit.jupiter.api.Assertions._ @ExtendWith(Array(classOf[MockitoExtension])) class DataciteToOAFTest extends AbstractVocabularyTest{ - var workingDir:Path= null + private var workingDir:Path = null val log: Logger = LoggerFactory.getLogger(getClass) @BeforeEach @@ -30,6 +31,11 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ super.setUpVocabulary() } + @AfterEach + def tearDown() :Unit = { + FileUtils.deleteDirectory(workingDir.toFile) + } + @Test def testDateMapping:Unit = { From 5fd0e610bf1cb1cd6f8e80e047602fba2703ddce Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 25 Nov 2021 12:10:45 +0100 Subject: [PATCH 049/176] [DOIBOOST Process] fix filtering to filter results with non null id --- .../src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 016279787..0ea4b66a4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -146,7 +146,7 @@ object SparkProcessMAG { .save(s"$workingPath/mag_publication") spark.read.load(s"$workingPath/mag_publication").as[Publication] - .filter(p => p.getId == null) + .filter(p => p.getId != null) .groupByKey(p => p.getId) .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) .map(_._2) From 1e1f5e4fe08be8bc81d23e1c1d40d0886fef00f4 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 13:03:17 +0100 Subject: [PATCH 050/176] minor fix --- dhp-common/pom.xml | 37 +++++++++++++++++++ .../AbstractScalaApplication.scala | 37 ------------------- .../application/SparkScalaApplication.scala | 37 +++++++++++++++++++ 3 files changed, 74 insertions(+), 37 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 4a13b3459..7b18f0105 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -21,6 +21,43 @@ This module contains common utilities meant to be used across the dnet-hadoop submodules + + + + net.alchim31.maven + scala-maven-plugin + ${net.alchim31.maven.version} + + + scala-compile-first + initialize + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + scala-doc + process-resources + + doc + + + + + ${scala.version} + + + + + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala deleted file mode 100644 index 44dad93eb..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala +++ /dev/null @@ -1,37 +0,0 @@ -package eu.dnetlib.dhp.application - -import org.apache.spark.SparkConf -import org.apache.spark.sql.SparkSession -import org.slf4j.Logger - -abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication { - - var parser: ArgumentApplicationParser = null - - var spark:SparkSession = null - - - def initialize():SparkScalaApplication = { - parser = parseArguments(args) - spark = createSparkSession() - this - } - - /** - * Utility for creating a spark session starting from parser - * - * @return a spark Session - */ - private def createSparkSession():SparkSession = { - require(parser!= null) - - val conf:SparkConf = new SparkConf() - val master = parser.get("master") - log.info(s"Creating Spark session: Master: $master") - SparkSession.builder().config(conf) - .appName(getClass.getSimpleName) - .master(master) - .getOrCreate() - } - -} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala index 247bacac0..6541746b2 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala @@ -33,3 +33,40 @@ trait SparkScalaApplication { */ def run(): Unit } + + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.slf4j.Logger + +abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication { + + var parser: ArgumentApplicationParser = null + + var spark:SparkSession = null + + + def initialize():SparkScalaApplication = { + parser = parseArguments(args) + spark = createSparkSession() + this + } + + /** + * Utility for creating a spark session starting from parser + * + * @return a spark Session + */ + private def createSparkSession():SparkSession = { + require(parser!= null) + + val conf:SparkConf = new SparkConf() + val master = parser.get("master") + log.info(s"Creating Spark session: Master: $master") + SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + } + +} \ No newline at end of file From 29f69f2f89d55213b2e07aa618b9113299c76939 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 Nov 2021 15:22:04 +0200 Subject: [PATCH 051/176] Sprint 4 --- .../scripts/step16-createIndicatorsTables.sql | 405 ++++++++++-------- 1 file changed, 237 insertions(+), 168 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 0ea4a5adc..15f2f0996 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -8,7 +8,7 @@ join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' -or ri.accessright = 'Embargo')) tmp +or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; create table indi_pub_grey_lit stored as parquet as @@ -41,178 +41,178 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; -create table indi_project_pubs_count stored as parquet as -select pr.id id, count(p.id) total_pubs from project_results pr -join publication p on p.id=pr.result -group by pr.id; +--create table indi_project_pubs_count stored as parquet as +--select pr.id id, count(p.id) total_pubs from project_results pr +--join publication p on p.id=pr.result +--group by pr.id; -create table indi_project_datasets_count stored as parquet as -select pr.id id, count(d.id) total_datasets from project_results pr -join dataset d on d.id=pr.result -group by pr.id; +--create table indi_project_datasets_count stored as parquet as +--select pr.id id, count(d.id) total_datasets from project_results pr +--join dataset d on d.id=pr.result +--group by pr.id; -create table indi_project_software_count stored as parquet as -select pr.id id, count(s.id) total_software from project_results pr -join software s on s.id=pr.result -group by pr.id; +--create table indi_project_software_count stored as parquet as +--select pr.id id, count(s.id) total_software from project_results pr +--join software s on s.id=pr.result +--group by pr.id; -create table indi_project_otherresearch_count stored as parquet as -select pr.id id, count(o.id) total_other from project_results pr -join otherresearchproduct o on o.id=pr.result -group by pr.id; +--create table indi_project_otherresearch_count stored as parquet as +--select pr.id id, count(o.id) total_other from project_results pr +--join otherresearchproduct o on o.id=pr.result +--group by pr.id; -create table indi_pub_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM publication p - join result_organization ro on p.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_pub_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + --from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM publication p + --join result_organization ro on p.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_dataset_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM dataset d - join result_organization ro on d.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_dataset_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM dataset d + --join result_organization ro on d.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_software_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM software s - join result_organization ro on s.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_software_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from +-- (SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +--END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM software s +-- join result_organization ro on s.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_other_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM otherresearchproduct orp - join result_organization ro on orp.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_other_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +-- from +-- (SELECT year, country, SUM(CASE +-- WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +-- END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM otherresearchproduct orp +-- join result_organization ro on orp.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_pub_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join publication p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofpubs -from total; +--create table indi_pub_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join publication p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofpubs +--from total; -create table indi_dataset_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join dataset p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofdataset -from total; +--create table indi_dataset_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join dataset p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofdataset +--from total; -create table indi_software_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join software p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofsoftware -from total; +--create table indi_software_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join software p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofsoftware +--from total; -create table indi_other_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join otherresearchproduct p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofother -from total; +--create table indi_other_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join otherresearchproduct p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofother +--from total; -create table indi_other_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from otherresearchproduct_datasources pd -join datasource d on datasource=d.id -join otherresearchproduct p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct -from total; +--create table indi_other_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from otherresearchproduct_datasources pd +--join datasource d on datasource=d.id +--join otherresearchproduct p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct +--from total; -create table indi_software_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from software_datasources pd -join datasource d on datasource=d.id -join software p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfSoftware -from total; +--create table indi_software_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from software_datasources pd +--join datasource d on datasource=d.id +--join software p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfSoftware +--from total; -create table indi_dataset_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from dataset_datasources pd -join datasource d on datasource=d.id -join dataset p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfDatasets -from total; +--create table indi_dataset_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from dataset_datasources pd +--join datasource d on datasource=d.id +--join dataset p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfDatasets +--from total; -create table indi_pub_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from publication_datasources pd -join datasource d on datasource=d.id -join publication p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfPubs -from total; +--create table indi_pub_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from publication_datasources pd +--join datasource d on datasource=d.id +--join publication p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfPubs +--from total; create table indi_pub_has_cc_licence stored as parquet as select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -231,11 +231,40 @@ join publication_licenses as license on license.id = p.id WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp on p.id= tmp.id; +-- EOSC-TR1.1-02M: +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. +create table indi_pub_has_cc_licence_tr stored as parquet as +select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr +from publication p +left outer join (select p.id, license.type as lic from publication p +join publication_licenses as license on license.id = p.id +where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +on p.id= tmp.id + +-- #EOSC-F2-01M_cc Rich metadata for scholarly publications +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. + +-- Same indicator as EOSC-TR1.1-02M (Najko's instructions) +-- create table indi_pub_has_cc_licence_f stored as parquet as +-- select +-- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f +-- from publication p +-- left outer join (selectp.id,license.type as lic from publication p +-- join publication_licenses as license on license.id = p.id +-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +-- on p.id= tmp.id + create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; -create table indi_with_orcid stored as parquet as +create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp @@ -270,13 +299,53 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +create table indi_pub_diamond stored as parquet as +select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal +from publication_datasources pd +left outer join ( +select pd.id, 1 as in_diamond_journal from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp +on pd.id=tmp.id + +create table indi_pub_hybrid stored as parquet as +select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +from publication_datasources pd +left outer join ( +select pd.id, 1 as is_hybrid from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp +on pd.id=tmp.id + +create table indi_is_gold_oa stored as parquet as +(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa +from publication_datasources pd +left outer join ( +select pd.id, 1 as gold_oa from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) +where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp +on pd.id=tmp.id) + + +create table indi_pub_in_transformative stored as parquet as +select distinct pd.id, coalesce(is_transformative, 0) as is_transformative +from publication pd +left outer join ( +select pd.id, 1 as is_transformative from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and ps.is_transformative_journal=true) tmp +on pd.id=tmp.id + +create table indi_pub_closed_other_open stored as parquet as +select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri +left outer join +(select ri.id, 1 as pub_closed_other_open from result_instance ri +join publication p on p.id=ri.id +join datasource d on ri.hostedby=d.id +where d.type like '%Journal%' and ri.accessright='Closed Access' and +(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp +on tmp.id=ri.id \ No newline at end of file From 0b4163ee0b3bec77bafe8811b7ee4f7f2a02a7cb Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 26 Nov 2021 15:58:01 +0200 Subject: [PATCH 052/176] added sprint3,4, removed 2, chaos --- .../scripts/step20-createMonitorDB.sql | 34 ++++++++----------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index f4fb2a174..fa8e4c6a7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -105,23 +105,6 @@ create table TARGET.project_results stored as parquet as select id as result, pr compute stats TARGET.project_results; -- indicators -create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig; -create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig; -create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig; - -create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig; -create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig; -create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig; - -create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig; -create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig; -create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig; -create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig; - -create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig; -create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig; -create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig; - create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_green_oa; create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); @@ -137,9 +120,20 @@ compute stats TARGET.indi_pub_has_cc_licence; create table TARGET.indi_pub_has_cc_licence_url stored as parquet as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_cc_licence_url; -create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig; -create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig; -create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig; +create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab; + +create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_with_orcid; +create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_funded_result_with_fundref; +create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_diamond; +create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_hybrid; +create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_in_transformative; +create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_closed_other_open; --denorm alter table TARGET.result rename to TARGET.res_tmp; From 25fc8abf77c512e650164972ba5cc5ade366b112 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 Nov 2021 15:22:04 +0200 Subject: [PATCH 053/176] Sprint 4 --- .../scripts/step16-createIndicatorsTables.sql | 405 ++++++++++-------- 1 file changed, 237 insertions(+), 168 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 0ea4a5adc..15f2f0996 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -8,7 +8,7 @@ join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' -or ri.accessright = 'Embargo')) tmp +or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; create table indi_pub_grey_lit stored as parquet as @@ -41,178 +41,178 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; -create table indi_project_pubs_count stored as parquet as -select pr.id id, count(p.id) total_pubs from project_results pr -join publication p on p.id=pr.result -group by pr.id; +--create table indi_project_pubs_count stored as parquet as +--select pr.id id, count(p.id) total_pubs from project_results pr +--join publication p on p.id=pr.result +--group by pr.id; -create table indi_project_datasets_count stored as parquet as -select pr.id id, count(d.id) total_datasets from project_results pr -join dataset d on d.id=pr.result -group by pr.id; +--create table indi_project_datasets_count stored as parquet as +--select pr.id id, count(d.id) total_datasets from project_results pr +--join dataset d on d.id=pr.result +--group by pr.id; -create table indi_project_software_count stored as parquet as -select pr.id id, count(s.id) total_software from project_results pr -join software s on s.id=pr.result -group by pr.id; +--create table indi_project_software_count stored as parquet as +--select pr.id id, count(s.id) total_software from project_results pr +--join software s on s.id=pr.result +--group by pr.id; -create table indi_project_otherresearch_count stored as parquet as -select pr.id id, count(o.id) total_other from project_results pr -join otherresearchproduct o on o.id=pr.result -group by pr.id; +--create table indi_project_otherresearch_count stored as parquet as +--select pr.id id, count(o.id) total_other from project_results pr +--join otherresearchproduct o on o.id=pr.result +--group by pr.id; -create table indi_pub_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM publication p - join result_organization ro on p.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_pub_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + --from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM publication p + --join result_organization ro on p.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_dataset_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM dataset d - join result_organization ro on d.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_dataset_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM dataset d + --join result_organization ro on d.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_software_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM software s - join result_organization ro on s.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_software_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from +-- (SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +--END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM software s +-- join result_organization ro on s.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_other_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM otherresearchproduct orp - join result_organization ro on orp.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_other_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +-- from +-- (SELECT year, country, SUM(CASE +-- WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +-- END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM otherresearchproduct orp +-- join result_organization ro on orp.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_pub_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join publication p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofpubs -from total; +--create table indi_pub_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join publication p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofpubs +--from total; -create table indi_dataset_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join dataset p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofdataset -from total; +--create table indi_dataset_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join dataset p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofdataset +--from total; -create table indi_software_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join software p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofsoftware -from total; +--create table indi_software_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join software p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofsoftware +--from total; -create table indi_other_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join otherresearchproduct p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofother -from total; +--create table indi_other_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join otherresearchproduct p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofother +--from total; -create table indi_other_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from otherresearchproduct_datasources pd -join datasource d on datasource=d.id -join otherresearchproduct p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct -from total; +--create table indi_other_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from otherresearchproduct_datasources pd +--join datasource d on datasource=d.id +--join otherresearchproduct p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct +--from total; -create table indi_software_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from software_datasources pd -join datasource d on datasource=d.id -join software p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfSoftware -from total; +--create table indi_software_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from software_datasources pd +--join datasource d on datasource=d.id +--join software p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfSoftware +--from total; -create table indi_dataset_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from dataset_datasources pd -join datasource d on datasource=d.id -join dataset p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfDatasets -from total; +--create table indi_dataset_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from dataset_datasources pd +--join datasource d on datasource=d.id +--join dataset p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfDatasets +--from total; -create table indi_pub_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from publication_datasources pd -join datasource d on datasource=d.id -join publication p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfPubs -from total; +--create table indi_pub_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from publication_datasources pd +--join datasource d on datasource=d.id +--join publication p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfPubs +--from total; create table indi_pub_has_cc_licence stored as parquet as select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -231,11 +231,40 @@ join publication_licenses as license on license.id = p.id WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp on p.id= tmp.id; +-- EOSC-TR1.1-02M: +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. +create table indi_pub_has_cc_licence_tr stored as parquet as +select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr +from publication p +left outer join (select p.id, license.type as lic from publication p +join publication_licenses as license on license.id = p.id +where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +on p.id= tmp.id + +-- #EOSC-F2-01M_cc Rich metadata for scholarly publications +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. + +-- Same indicator as EOSC-TR1.1-02M (Najko's instructions) +-- create table indi_pub_has_cc_licence_f stored as parquet as +-- select +-- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f +-- from publication p +-- left outer join (selectp.id,license.type as lic from publication p +-- join publication_licenses as license on license.id = p.id +-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +-- on p.id= tmp.id + create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; -create table indi_with_orcid stored as parquet as +create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp @@ -270,13 +299,53 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +create table indi_pub_diamond stored as parquet as +select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal +from publication_datasources pd +left outer join ( +select pd.id, 1 as in_diamond_journal from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp +on pd.id=tmp.id + +create table indi_pub_hybrid stored as parquet as +select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +from publication_datasources pd +left outer join ( +select pd.id, 1 as is_hybrid from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp +on pd.id=tmp.id + +create table indi_is_gold_oa stored as parquet as +(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa +from publication_datasources pd +left outer join ( +select pd.id, 1 as gold_oa from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) +where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp +on pd.id=tmp.id) + + +create table indi_pub_in_transformative stored as parquet as +select distinct pd.id, coalesce(is_transformative, 0) as is_transformative +from publication pd +left outer join ( +select pd.id, 1 as is_transformative from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and ps.is_transformative_journal=true) tmp +on pd.id=tmp.id + +create table indi_pub_closed_other_open stored as parquet as +select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri +left outer join +(select ri.id, 1 as pub_closed_other_open from result_instance ri +join publication p on p.id=ri.id +join datasource d on ri.hostedby=d.id +where d.type like '%Journal%' and ri.accessright='Closed Access' and +(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp +on tmp.id=ri.id \ No newline at end of file From 09fc2afdcad627ec47cedfabf18e946d4036a0db Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 Nov 2021 16:13:10 +0200 Subject: [PATCH 054/176] Added indi_funder_country_collab Kept only indi_pub_has_cc_licence --- .../scripts/step16-createIndicatorsTables.sql | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 15f2f0996..926c8825f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -236,13 +236,13 @@ on p.id= tmp.id; -- de facto standard in scholarly communication and is promoted by many initiatives -- like Plan S. This indicator might be only useful when applied -- to openly available publications. -create table indi_pub_has_cc_licence_tr stored as parquet as -select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr -from publication p -left outer join (select p.id, license.type as lic from publication p -join publication_licenses as license on license.id = p.id -where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -on p.id= tmp.id +--create table indi_pub_has_cc_licence_tr stored as parquet as +--select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr +--from publication p +--left outer join (select p.id, license.type as lic from publication p +--join publication_licenses as license on license.id = p.id +--where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +--on p.id= tmp.id -- #EOSC-F2-01M_cc Rich metadata for scholarly publications -- ## Indicator: has_cc_license. Creative Commons licensing has become a @@ -299,6 +299,17 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type +create table indi_funder_country_collab stored as parquet as +with tmp as (select funder, project, country from organization_projects op +join organization o on o.id=op.id +join project p on p.id=op.project +where country <> 'UNKNOWN') +select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations +from tmp as f1 +join tmp as f2 on f1.project=f2.project +where f1.country<>f2.country +group by f1.funder, f2.country, f1.country + create table indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from publication_datasources pd From 5c6d32853713276f7808b3b3dcff046a447a1017 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Nov 2021 15:38:16 +0100 Subject: [PATCH 055/176] code formatting --- .../oa/graph/clean/GraphCleaningFunctionsTest.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 64fe7749b..c8a368dd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -225,28 +225,26 @@ public class GraphCleaningFunctionsTest { GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); } - - @Test public void testCleanDoiBoost() throws IOException { - String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); Publication cleaned = GraphCleaningFunctions.cleanup(p_out); - - Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); } @Test public void testCleanDoiBoost2() throws IOException { - String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); Publication cleaned = GraphCleaningFunctions.cleanup(p_out); - - Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); } } From 014e872ae1d00a4cf32506e1ab2e204874913e25 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Nov 2021 15:38:56 +0100 Subject: [PATCH 056/176] [resolution wf] added optional parameter to skip the entity resolution --- .../graph/resolution/oozie_app/workflow.xml | 64 ++++++++++++++++++- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 031b5a36f..3cd08bc9b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -12,6 +12,11 @@ targetPath the target path after resolution + + shouldResolveEntities + true + allows to activate/deactivate the resolution process over the entities + @@ -42,10 +47,18 @@ --workingPath${workingDir} --targetPath${targetPath} - + + + + ${wf:conf('shouldResolveEntities') eq false} + ${wf:conf('shouldResolveEntities') eq true} + + + + yarn @@ -73,10 +86,55 @@ + + + + + + + + + + ${nameNode}/${graphBasePath}/publication + ${nameNode}/${targetPath}/publication + + + + + + + + ${nameNode}/${graphBasePath}/dataset + ${nameNode}/${targetPath}/dataset + + + + + + + + ${nameNode}/${graphBasePath}/otherresearchproduct + ${nameNode}/${targetPath}/otherresearchproduct + + + + + + + + ${nameNode}/${graphBasePath}/software + ${nameNode}/${targetPath}/software + + + + + + + - + @@ -97,7 +155,7 @@ - + ${nameNode}/${graphBasePath}/datasource ${nameNode}/${targetPath}/datasource From 01e5e0142ac9d0e181e1d62b03c564bf4c15953d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 1 Dec 2021 09:46:26 +0100 Subject: [PATCH 057/176] added test to verify the relation inverse lookup operation --- .../dhp/blacklist/BlacklistRelationTest.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java new file mode 100644 index 000000000..8b8c21001 --- /dev/null +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.blacklist; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.common.RelationInverse; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; + +public class BlacklistRelationTest { + + @Test + public void testRelationInverseLookup() { + + final List rels = Arrays.asList( + "resultResult_relationship_IsRelatedTo", + "resultOrganization_affiliation_isAuthorInstitutionOf", + "resultOrganization_affiliation_hasAuthorInstitution", + "datasourceOrganization_provision_isProvidedBy", + "projectOrganization_participation_hasParticipant", + "resultProject_outcome_produces", + "resultProject_outcome_isProducedBy"); + + rels.forEach(r -> { + RelationInverse inverse = ModelSupport.relationInverseMap.get(r); + Assertions.assertNotNull(inverse); + Assertions.assertNotNull(inverse.getRelType()); + Assertions.assertNotNull(inverse.getSubReltype()); + Assertions.assertNotNull(inverse.getRelClass()); + }); + + } + +} From 4fe788881796525044517c19d633ff1df017656f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 1 Dec 2021 15:48:15 +0100 Subject: [PATCH 058/176] code formatting --- .../dhp/blacklist/BlacklistRelationTest.java | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java index 8b8c21001..160658e5b 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java @@ -1,35 +1,38 @@ -package eu.dnetlib.dhp.blacklist; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.common.RelationInverse; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; +package eu.dnetlib.dhp.blacklist; import java.util.Arrays; import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.common.RelationInverse; + public class BlacklistRelationTest { - @Test - public void testRelationInverseLookup() { + @Test + public void testRelationInverseLookup() { - final List rels = Arrays.asList( - "resultResult_relationship_IsRelatedTo", - "resultOrganization_affiliation_isAuthorInstitutionOf", - "resultOrganization_affiliation_hasAuthorInstitution", - "datasourceOrganization_provision_isProvidedBy", - "projectOrganization_participation_hasParticipant", - "resultProject_outcome_produces", - "resultProject_outcome_isProducedBy"); + final List rels = Arrays + .asList( + "resultResult_relationship_IsRelatedTo", + "resultOrganization_affiliation_isAuthorInstitutionOf", + "resultOrganization_affiliation_hasAuthorInstitution", + "datasourceOrganization_provision_isProvidedBy", + "projectOrganization_participation_hasParticipant", + "resultProject_outcome_produces", + "resultProject_outcome_isProducedBy"); - rels.forEach(r -> { - RelationInverse inverse = ModelSupport.relationInverseMap.get(r); - Assertions.assertNotNull(inverse); - Assertions.assertNotNull(inverse.getRelType()); - Assertions.assertNotNull(inverse.getSubReltype()); - Assertions.assertNotNull(inverse.getRelClass()); - }); + rels.forEach(r -> { + RelationInverse inverse = ModelSupport.relationInverseMap.get(r); + Assertions.assertNotNull(inverse); + Assertions.assertNotNull(inverse.getRelType()); + Assertions.assertNotNull(inverse.getSubReltype()); + Assertions.assertNotNull(inverse.getRelClass()); + }); - } + } } From d85af6fc255acf12347b7424bb9d596d63abeefa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 1 Dec 2021 15:49:15 +0100 Subject: [PATCH 059/176] [cleaning wf] fixed OAF record navigation, a mapping defined on a container object would have prevented the natvigation to continue on its properties --- .../dhp/oa/graph/clean/OafCleaner.java | 5 +++++ .../clean/GraphCleaningFunctionsTest.java | 3 +++ .../dnetlib/dhp/oa/graph/clean/relation.json | 20 +++++++++---------- .../dnetlib/dhp/oa/graph/clean/synonyms.txt | 3 ++- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 5502fd391..102a1fa85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -30,6 +30,11 @@ public class OafCleaner implements Serializable { } } else if (hasMapping(o, mapping)) { mapping.get(o.getClass()).accept(o); + for (final Field f : getAllFields(o.getClass())) { + f.setAccessible(true); + final Object val = f.get(o); + navigate(val, mapping); + } } else { for (final Field f : getAllFields(o.getClass())) { f.setAccessible(true); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index c8a368dd6..b69d0c08b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -68,6 +68,9 @@ public class GraphCleaningFunctionsTest { Relation r_out = OafCleaner.apply(r_in, mapping); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); + + assertEquals("iis", r_out.getDataInfo().getProvenanceaction().getClassid()); + assertEquals("Inferred by OpenAIRE", r_out.getDataInfo().getProvenanceaction().getClassname()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json index 97764de00..06eb9bae0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json @@ -1,10 +1,10 @@ -{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"} -{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} \ No newline at end of file +{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 79dc7cd2d..09bd58aeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -1241,4 +1241,5 @@ dnet:relation_relClass @=@ Reviews @=@ reviews dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo -dnet:relation_subRelType @=@ relationship @=@ publicationDataset \ No newline at end of file +dnet:relation_subRelType @=@ relationship @=@ publicationDataset +dnet:provenanceActions @=@ iis @=@ erroneous label to be cleaned \ No newline at end of file From cfa456076908aca3a828bec81a34bf5a40788f2a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 2 Dec 2021 10:43:36 +0100 Subject: [PATCH 060/176] minor: fixed hive action name --- .../eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml index 09930336a..ba5f4f375 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml @@ -292,7 +292,7 @@ yarn cluster - Import table project + Import table relation eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob dhp-graph-mapper-${projectVersion}.jar From 3b19821f3c357f37f91467caad84a6f7cf874c34 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 2 Dec 2021 10:44:10 +0100 Subject: [PATCH 061/176] added stats computation on the graph hive DB tables --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 46e0eb5e1..7eaec2e2c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -8,3 +8,12 @@ CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.software s union all select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.otherresearchproduct o; + +ANALYZE TABLE ${hiveDbName}.datasource COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.organization COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.project COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.publication COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.dataset COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.otherresearchproduct COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.software COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.relation COMPUTE STATISTICS; \ No newline at end of file From 87eedad89814d7310e3e07a802ca85b6bfcb13b1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 2 Dec 2021 13:17:19 +0100 Subject: [PATCH 062/176] - --- dhp-workflows/dhp-aggregation/src/site/markdown/index.md | 2 +- .../dhp-aggregation/src/site/markdown/integration.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md index 240617f91..6c4e05d5f 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md @@ -16,5 +16,5 @@ It defines [mappings](mappings.md) for transformation of different datasource (S ### Integration of external information in the result -The workflows create new entity in the OpenAIRE format (OAF) which aim is to enrich the result already contained in the graph. +The workflows create new entity in the OpenAIRE format (OAF) whose aim is to enrich the result already contained in the graph. See integration section for more insight diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md index baf232e40..7b763c681 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md @@ -9,7 +9,7 @@ The information integrated so far is: 1. Article impact measures 1. [Bip!Finder](https://dl.acm.org/doi/10.1145/3357384.3357850) scores 2. Result Subjects - 1. Integration of Fields od Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in + 1. Integration of Fields of Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in results subjects. @@ -18,7 +18,7 @@ and the element in the OAF model that should be used to map the information we w The id is set by using a particular encoding of the given PID -*unresolved:[pid]:[pidtype]* +*unresolved::[pid]::[pidtype]* where @@ -31,6 +31,6 @@ Such entities are matched against those available in the graph using the result. This mechanism can be used to integrate enrichments produced as associated by a given PID. If a match will be found with one of the results already in the graph that said result will be enriched with the information present in the new OAF. -All the objects for which a match is not found are discarded. +All the entities for which a match is not found are discarded. From 58bc3f223ad1e180113fae6909c8656a8b85ee68 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 2 Dec 2021 14:09:46 +0100 Subject: [PATCH 063/176] [GRAPH DUMP] Add filtering for relation we do not want to dump. It is based on the relclass --- .../dump/complete/CreateContextEntities.java | 2 +- .../dump/complete/CreateContextRelation.java | 2 +- .../dump/complete/SparkCollectAndSave.java | 2 +- .../dump/complete/SparkDumpEntitiesJob.java | 2 +- .../dump/complete/SparkDumpRelationJob.java | 17 +- .../complete/SparkOrganizationRelation.java | 2 +- .../SparkSelectValidRelationsJob.java | 2 +- .../community/oozie_app/config-default.xml | 30 - .../dump/community/oozie_app/workflow.xml | 431 ------------- .../complete/oozie_app/config-default.xml | 30 - .../dump/complete/oozie_app/workflow.xml | 586 ------------------ .../community_infrastructure_schema.json | 37 -- .../complete/schema/datasource_schema.json | 192 ------ .../complete/schema/organization_schema.json | 57 -- .../dump/complete/schema/project_schema.json | 119 ---- .../dump/complete/schema/relation_schema.json | 60 -- .../dump/complete/schema/result_schema.json | 398 ------------ .../oozie_app/config-default.xml | 30 - .../dump/funderresults/oozie_app/workflow.xml | 563 ----------------- .../input_collect_and_save.json | 0 ...rs.json => input_complete_parameters.json} | 0 .../input_entity_parameter.json | 0 .../input_organization_parameters.json | 0 .../input_relationdump_parameters.json | 6 + .../community_infrastructure_schema.json | 35 -- .../graph/dump/schemas/datasource_schema.json | 192 ------ .../dump/schemas/organization_schema.json | 57 -- .../oa/graph/dump/schemas/project_schema.json | 119 ---- .../graph/dump/schemas/relation_schema.json | 68 -- .../oa/graph/dump/schemas/result_schema.json | 417 ------------- .../graph/dump/complete/DumpRelationTest.java | 61 +- 31 files changed, 84 insertions(+), 3433 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_collect_and_save.json (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete/input_parameters.json => input_complete_parameters.json} (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_entity_parameter.json (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_organization_parameters.json (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_relationdump_parameters.json (73%) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index 120de9327..aa031203d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -41,7 +41,7 @@ public class CreateContextEntities implements Serializable { .toString( CreateContextEntities.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index a468e334d..3c71fcaa5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -48,7 +48,7 @@ public class CreateContextRelation implements Serializable { .requireNonNull( CreateContextRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java index 671bccd25..e902a02c8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java @@ -31,7 +31,7 @@ public class SparkCollectAndSave implements Serializable { .toString( SparkCollectAndSave.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java index 8b282386f..f239ffca7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java @@ -22,7 +22,7 @@ public class SparkDumpEntitiesJob implements Serializable { .toString( SparkDumpEntitiesJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/wf/input_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index ddfd6592f..05e3f8835 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -4,10 +4,11 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Optional; +import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -37,7 +38,7 @@ public class SparkDumpRelationJob implements Serializable { .toString( SparkDumpRelationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -54,6 +55,13 @@ public class SparkDumpRelationJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + Optional rs = Optional.ofNullable(parser.get("removeSet")); + final Set removeSet = new HashSet<>(); + if(rs.isPresent()){ + Collections.addAll(removeSet, rs.get().split(";")); + } + + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -61,15 +69,16 @@ public class SparkDumpRelationJob implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - dumpRelation(spark, inputPath, outputPath); + dumpRelation(spark, inputPath, outputPath, removeSet); }); } - private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) { + private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set removeSet) { Dataset relations = Utils.readPath(spark, inputPath, Relation.class); relations + .filter((FilterFunction)r -> !removeSet.contains(r.getRelClass())) .map((MapFunction) relation -> { eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); relNew diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index f9d2123e2..4475232de 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -39,7 +39,7 @@ public class SparkOrganizationRelation implements Serializable { .toString( SparkOrganizationRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index 20f3fc4a7..d1bca6a7c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -35,7 +35,7 @@ public class SparkSelectValidRelationsJob implements Serializable { .toString( SparkSelectValidRelationsJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0ae..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml deleted file mode 100644 index fcef2547a..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml +++ /dev/null @@ -1,431 +0,0 @@ - - - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - one among {new, update, version} - - - conceptRecordId - for new version, the id of the record for the old deposition - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software - --communityMapPath${workingDir}/communityMap - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${workingDir}/split - --communityMapPath${workingDir}/communityMap - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/split - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionId${depositionId} - --depositionType${depositionType} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0ae..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml deleted file mode 100644 index 8189e2594..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml +++ /dev/null @@ -1,586 +0,0 @@ - - - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - resultAggregation - true if all the result type have to be dumped under result. false otherwise - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - organizationCommunityMap - the organization community map - - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table organization - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/organization - --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization - --outputPath${workingDir}/collect/organization - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${workingDir}/collect/project - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table datasource - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/datasource - --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource - --outputPath${workingDir}/collect/datasource - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/relation - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${workingDir}/collect/communities_infrastructures/communities_infrastructure.json.gz - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation - --hdfsPath${workingDir}/relation/context - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/contextOrg - --organizationCommunityMap${organizationCommunityMap} - --communityMapPath${workingDir}/communityMap - - - - - - - - - - - - - - - - - yarn - cluster - Extract Relations from publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/relation/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/relation/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/relation/orp - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/relation/software - --communityMapPath${workingDir}/communityMap - - - - - - - - - - yarn - cluster - Collect Results and Relations and put them in the right path - eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir} - --outputPath${workingDir}/collect - --resultAggregation${resultAggregation} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/collect - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json deleted file mode 100644 index d2f179212..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the research community/research infrastructure" - }, - "id": { - "type": "string", - "description": "OpenAIRE id of the research community/research infrastructure" - }, - "name": { - "type": "string", - "description": "The long name of the community" - }, - "originalId": { - "type": "string", - "description": "The acronym of the community" - }, - "subject": { - "description": "Only for research communities: the list of the subjects associated to the research community", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "One of {Research Community, Research infrastructure}" - }, - "zenodo_community": { - "type": "string", - "description": "The URL of the Zenodo community associated to the Research community/Research infrastructure" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json deleted file mode 100644 index b9c15d921..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json +++ /dev/null @@ -1,192 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - } - }, - "type": "object", - "properties": { - "accessrights": { - "type": "string", - "description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}" - }, - "certificates": { - "type": "string", - "description": "The certificate, seal or standard the data source complies with. As defined by re3data.org." - }, - "citationguidelineurl": { - "type": "string", - "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." - }, - "contenttypes": { - "description": "Types of content in the data source, as defined by OpenDOAR", - "type": "array", - "items": { - "type": "string" - } - }, - "databaseaccessrestriction": { - "type": "string", - "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "datasourcetype": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - }, - { - "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" - } - ] - }, - "datauploadrestriction": { - "type": "string", - "description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "dateofvalidation": { - "type": "string", - "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" - }, - "description": { - "type": "string" - }, - "englishname": { - "type": "string", - "description": "The English name of the datasource" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id of the data source" - }, - "journal": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Issue number" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string" - }, - "sp": { - "type": "string", - "description": "Start page" - }, - "vol": { - "type": "string", - "description": "Volume" - } - }, - "description": "Information about the journal, if this data source is of type Journal." - }, - "languages": { - "description": "The languages present in the data source's content, as defined by OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "logourl": { - "type": "string" - }, - "missionstatementurl": { - "type": "string", - "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" - }, - "officialname": { - "type": "string", - "description": "The official name of the datasource" - }, - "openairecompatibility": { - "type": "string", - "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." - }, - "originalId": { - "description": "Original identifiers for the datasource" - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the datasource", - "type": "array", - "items": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - } - ] - } - }, - "pidsystems": { - "type": "string", - "description": "The persistent identifier system that is used by the data source. As defined by re3data.org" - }, - "policies": { - "description": "Policies of the data source, as defined in OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "releaseenddate": { - "type": "string", - "description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org" - }, - "releasestartdate": { - "type": "string", - "description": "Releasing date of the data source, as defined by re3data.org" - }, - "subjects": { - "description": "List of subjects associated to the datasource", - "type": "array", - "items": { - "type": "string" - } - }, - "uploadrights": { - "type": "string", - "description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}" - }, - "versioning": { - "type": "boolean", - "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." - }, - "websiteurl": { - "type": "string" - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json deleted file mode 100644 index 16afa386d..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "alternativenames": { - "description": "Alternative names that identify the organisation", - "type": "array", - "items": { - "type": "string" - } - }, - "country": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The organisation country code" - }, - "label": { - "type": "string", - "description": "The organisation country label" - } - }, - "description": "The country of the organisation" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id for the organisation" - }, - "legalname": { - "type": "string" - }, - "legalshortname": { - "type": "string" - }, - "pid": { - "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", - "type": "array", - "items": { - "type": "object", - "properties": { - "scheme": { - "type": "string", - "description": "The scheme of the identifier (i.e. isni)" - }, - "value": { - "type": "string", - "description": "the value in the schema (i.e. 0000000090326370)" - } - } - } - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json deleted file mode 100644 index c81187258..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "acronym": { - "type": "string" - }, - "callidentifier": { - "type": "string" - }, - "code": { - "type": "string", - "description": "The grant agreement number" - }, - "enddate": { - "type": "string" - }, - "funding": { - "description": "Funding information for the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "funding_stream": { - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the funding stream" - }, - "id": { - "type": "string", - "description": "Id of the funding stream" - } - } - }, - "jurisdiction": { - "type": "string", - "description": "The jurisdiction of the funder (i.e. EU)" - }, - "name": { - "type": "string", - "description": "The name of the funder (European Commission)" - }, - "shortName": { - "type": "string", - "description": "The short name of the funder (EC)" - } - } - } - }, - "granted": { - "type": "object", - "properties": { - "currency": { - "type": "string", - "description": "The currency of the granted amount (e.g. EUR)" - }, - "fundedamount": { - "type": "number", - "description": "The funded amount" - }, - "totalcost": { - "type": "number", - "description": "The total cost of the project" - } - }, - "description": "The money granted to the project" - }, - "h2020programme": { - "description": "The h2020 programme funding the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The code of the programme" - }, - "description": { - "type": "string", - "description": "The description of the programme" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE id for the project" - }, - "keywords": { - "type": "string" - }, - "openaccessmandatefordataset": { - "type": "boolean" - }, - "openaccessmandateforpublications": { - "type": "boolean" - }, - "startdate": { - "type": "string" - }, - "subject": { - "type": "array", - "items": { - "type": "string" - } - }, - "summary": { - "type": "string" - }, - "title": { - "type": "string" - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json deleted file mode 100644 index 7c7de9c98..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "Node": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The OpenAIRE id of the entity" - }, - "type": { - "type": "string", - "description": "The type of the entity (i.e. organisation)" - } - } - } - }, - "type": "object", - "properties": { - "provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The reason why OpenAIRE holds the relation " - }, - "trust": { - "type": "string", - "description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)" - } - } - }, - "reltype": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The semantics of the relation (i.e. isAuthorInstitutionOf). " - }, - "type": { - "type": "string", - "description": "the type of the relation (i.e. affiliation)" - } - }, - "description": "To represent the semantics of a relation between two entities" - }, - "source": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node source in the relation"} - ] - }, - "target": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node target in the relation"} - ] - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json deleted file mode 100644 index 03cbfb074..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json +++ /dev/null @@ -1,398 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "AccessRight":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - }, - "Provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The process that produced/provided the information" - }, - "trust": { - "type": "string" - } - }, - "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" - } - }, - "type": "object", - "properties": { - "author": { - "type": "array", - "items": { - "type": "object", - "properties": { - "fullname": { - "type": "string" - }, - "name": { - "type": "string" - }, - "pid": { - "type": "object", - "properties": { - "id": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"} - ] - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Provenance of author's pid"} - ] - } - } - }, - "rank": { - "type": "integer" - }, - "surname": { - "type": "string" - } - } - } - }, - "bestaccessright": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - }, - "description": "The openest access right associated to the manifestations of this research results" - }, - "codeRepositoryUrl": { - "type": "string", - "description": "Only for results with type 'software': the URL to the repository with the source code" - }, - "contactgroup": { - "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "contactperson": { - "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "container": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string", - "description": "Edition of the journal or conference proceeding" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Journal issue" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string", - "description": "Name of the journal or conference" - }, - "sp": { - "type": "string", - "description": "start page" - }, - "vol": { - "type": "string" - } - }, - "description": "Container has information about the conference or journal where the result has been presented or published" - }, - "contributor": { - "type": "array", - "items": { - "type": "string", - "description": "Description of contributor" - } - }, - "country": { - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "ISO 3166-1 alpha-2 country code" - }, - "label": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is associated to the country."} - ] - } - } - } - }, - "coverage": { - "type": "array", - "items": { - "type": "string" - } - }, - "dateofcollection": { - "type": "string", - "description": "When OpenAIRE collected the record the last time" - }, - "description": { - "type": "array", - "items": { - "type": "string" - } - }, - "documentationUrl": { - "description": "Only for results with type 'software': URL to the software documentation", - "type": "array", - "items": { - "type": "string" - } - }, - "embargoenddate": { - "type": "string", - "description": "Date when the embargo ends and this result turns Open Access" - }, - "format": { - "type": "array", - "items": { - "type": "string" - } - }, - "geolocation": { - "description": "Geolocation information", - "type": "array", - "items": { - "type": "object", - "properties": { - "box": { - "type": "string" - }, - "place": { - "type": "string" - }, - "point": { - "type": "string" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE Identifier" - }, - "language": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "alpha-3/ISO 639-2 code of the language" - }, - "label": { - "type": "string", - "description": "English label" - } - } - }, - "lastupdatetimestamp": { - "type": "integer", - "description": "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle": { - "type": "string" - }, - "originalId": { - "description": "Identifiers of the record at the original sources", - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the result", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "} - ] - } - }, - "instance":{ - "type":"array", - "items":{ - "type":"object", - "properties":{ - "accessright":{ - "allOf":[ - { - "$ref":"#/definitions/AccessRight" - }, - { - "description":"The accessright of this materialization of the result" - } - ] - }, - "articleprocessingcharge":{ - "type":"object", - "properties":{ - "amount":{ - "type":"string" - }, - "currency":{ - "type":"string" - } - } - }, - "license":{ - "type":"string" - }, - "publicationdate":{ - "type":"string" - }, - "refereed":{ - "type":"string" - }, - "type":{ - "type":"string", - "description":"The specific sub-type of this materialization of the result (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" - }, - "url":{ - "description":"Description of url", - "type":"array", - "items":{ - "type":"string", - "description":"urls where it is possible to access the materialization of the result" - } - } - }, - "description":"One of the materialization for this result" - } - }, - "programmingLanguage": { - "type": "string", - "description": "Only for results with type 'software': the programming language" - }, - "publicationdate": { - "type": "string" - }, - "publisher": { - "type": "string" - }, - "size": { - "type": "string", - "description": "Only for results with type 'dataset': the declared size of the dataset" - }, - "source": { - "description": "See definition of Dublin Core field dc:source", - "type": "array", - "items": { - "type": "string" - } - }, - "subjects": { - "description": "Keywords associated to the result", - "type": "array", - "items": { - "type": "object", - "properties": { - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this subject is associated to the result"} - ] - }, - "subject": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} - ] - } - } - } - }, - "subtitle": { - "type": "string" - }, - "tool": { - "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" - }, - "version": { - "type": "string", - "description": "Version of the result" - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0ae..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml deleted file mode 100644 index 650b972fa..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ /dev/null @@ -1,563 +0,0 @@ - - - - - upload - false - true to upload the dump for the funders in Zenodo - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --relationPath${sourcePath}/relation - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/dump/publication - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table dataset for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dump/dataset - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/dump/otherresearchproduct - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/dump/software - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${workingDir}/resultperfunder - --relationPath${sourcePath} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/resultperfunder - - - - - - - - ${wf:conf('upload') eq true} - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json similarity index 73% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json index 2bfcac3bc..5c26ea7d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json @@ -19,6 +19,12 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false + }, + { + "paramName": "rs", + "paramLongName": "removeSet", + "paramDescription": "the list of classname relations, split by ';', not to be dumped", + "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json deleted file mode 100644 index 727432a67..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "acronym": { - "type": "string", - "description": "The acronym of the community" - }, - "description": { - "type": "string", - "description": "Description of the research community/research infrastructure" - }, - "id": { - "type": "string", - "description": "OpenAIRE id of the research community/research infrastructure" - }, - "name": { - "type": "string", - "description": "The long name of the community" - }, - "subject": { - "description": "Only for research communities: the list of the subjects associated to the research community", - "type": "array", - "items": {"type": "string"} - }, - "type": { - "type": "string", - "description": "One of {Research Community, Research infrastructure}" - }, - "zenodo_community": { - "type": "string", - "description": "The URL of the Zenodo community associated to the Research community/Research infrastructure" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json deleted file mode 100644 index c416f2320..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json +++ /dev/null @@ -1,192 +0,0 @@ -{ - "$schema":"http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - } - }, - "type":"object", - "properties": { - "accessrights": { - "type": "string", - "description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}" - }, - "certificates": { - "type": "string", - "description": "The certificate, seal or standard the data source complies with. As defined by re3data.org." - }, - "citationguidelineurl": { - "type": "string", - "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." - }, - "contenttypes": { - "description": "Types of content in the data source, as defined by OpenDOAR", - "type": "array", - "items": { - "type": "string" - } - }, - "databaseaccessrestriction": { - "type": "string", - "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "datasourcetype": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - }, - { - "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" - } - ] - }, - "datauploadrestriction": { - "type": "string", - "description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "dateofvalidation": { - "type": "string", - "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" - }, - "description": { - "type": "string" - }, - "englishname": { - "type": "string", - "description": "The English name of the datasource" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id of the data source" - }, - "journal": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Issue number" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string" - }, - "sp": { - "type": "string", - "description": "Start page" - }, - "vol": { - "type": "string", - "description": "Volume" - } - }, - "description": "Information about the journal, if this data source is of type Journal." - }, - "languages": { - "description": "The languages present in the data source's content, as defined by OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "logourl": { - "type": "string" - }, - "missionstatementurl": { - "type": "string", - "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" - }, - "officialname": { - "type": "string", - "description": "The official name of the datasource" - }, - "openairecompatibility": { - "type": "string", - "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." - }, - "originalId": { - "description": "Original identifiers for the datasource" - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the datasource", - "type": "array", - "items": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - } - ] - } - }, - "pidsystems": { - "type": "string", - "description": "The persistent identifier system that is used by the data source. As defined by re3data.org" - }, - "policies": { - "description": "Policies of the data source, as defined in OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "releaseenddate": { - "type": "string", - "description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org" - }, - "releasestartdate": { - "type": "string", - "description": "Releasing date of the data source, as defined by re3data.org" - }, - "subjects": { - "description": "List of subjects associated to the datasource", - "type": "array", - "items": { - "type": "string" - } - }, - "uploadrights": { - "type": "string", - "description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}" - }, - "versioning": { - "type": "boolean", - "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json deleted file mode 100644 index 16afa386d..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "alternativenames": { - "description": "Alternative names that identify the organisation", - "type": "array", - "items": { - "type": "string" - } - }, - "country": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The organisation country code" - }, - "label": { - "type": "string", - "description": "The organisation country label" - } - }, - "description": "The country of the organisation" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id for the organisation" - }, - "legalname": { - "type": "string" - }, - "legalshortname": { - "type": "string" - }, - "pid": { - "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", - "type": "array", - "items": { - "type": "object", - "properties": { - "scheme": { - "type": "string", - "description": "The scheme of the identifier (i.e. isni)" - }, - "value": { - "type": "string", - "description": "the value in the schema (i.e. 0000000090326370)" - } - } - } - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json deleted file mode 100644 index c81187258..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "acronym": { - "type": "string" - }, - "callidentifier": { - "type": "string" - }, - "code": { - "type": "string", - "description": "The grant agreement number" - }, - "enddate": { - "type": "string" - }, - "funding": { - "description": "Funding information for the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "funding_stream": { - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the funding stream" - }, - "id": { - "type": "string", - "description": "Id of the funding stream" - } - } - }, - "jurisdiction": { - "type": "string", - "description": "The jurisdiction of the funder (i.e. EU)" - }, - "name": { - "type": "string", - "description": "The name of the funder (European Commission)" - }, - "shortName": { - "type": "string", - "description": "The short name of the funder (EC)" - } - } - } - }, - "granted": { - "type": "object", - "properties": { - "currency": { - "type": "string", - "description": "The currency of the granted amount (e.g. EUR)" - }, - "fundedamount": { - "type": "number", - "description": "The funded amount" - }, - "totalcost": { - "type": "number", - "description": "The total cost of the project" - } - }, - "description": "The money granted to the project" - }, - "h2020programme": { - "description": "The h2020 programme funding the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The code of the programme" - }, - "description": { - "type": "string", - "description": "The description of the programme" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE id for the project" - }, - "keywords": { - "type": "string" - }, - "openaccessmandatefordataset": { - "type": "boolean" - }, - "openaccessmandateforpublications": { - "type": "boolean" - }, - "startdate": { - "type": "string" - }, - "subject": { - "type": "array", - "items": { - "type": "string" - } - }, - "summary": { - "type": "string" - }, - "title": { - "type": "string" - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json deleted file mode 100644 index 98134a03b..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "$schema":"http://json-schema.org/draft-07/schema#", - "definitions": { - "Node": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The OpenAIRE id of the entity" - }, - "type": { - "type": "string", - "description": "The type of the entity (i.e. organisation)" - } - } - } - }, - "type":"object", - "properties": { - "provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The reason why OpenAIRE holds the relation " - }, - "trust": { - "type": "string", - "description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)" - } - } - }, - "reltype": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The semantics of the relation (i.e. isAuthorInstitutionOf). " - }, - "type": { - "type": "string", - "description": "the type of the relation (i.e. affiliation)" - } - }, - "description": "To represent the semantics of a relation between two entities" - }, - "source": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node source in the relation"} - ] - }, - "target": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node target in the relation"} - ] - }, - "validated":{ - "type":"boolean", - "description":"True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)" - }, - "validationDate":{ - "type":"string", - "description":"The date when the relation was collected from OpenAIRE" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json deleted file mode 100644 index a1e09525e..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json +++ /dev/null @@ -1,417 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - }, - "Provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The process that produced/provided the information" - }, - "trust": { - "type": "string" - } - }, - "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" - } - }, - "type": "object", - "properties": { - "author": { - "type": "array", - "items": { - "type": "object", - "properties": { - "fullname": { - "type": "string" - }, - "name": { - "type": "string" - }, - "pid": { - "type": "object", - "properties": { - "id": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"} - ] - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Provenance of author's pid"} - ] - } - } - }, - "rank": { - "type": "integer" - }, - "surname": { - "type": "string" - } - } - } - }, - "bestaccessright":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "codeRepositoryUrl": { - "type": "string", - "description": "Only for results with type 'software': the URL to the repository with the source code" - }, - "contactgroup": { - "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "contactperson": { - "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "container": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string", - "description": "Edition of the journal or conference proceeding" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Journal issue" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string", - "description": "Name of the journal or conference" - }, - "sp": { - "type": "string", - "description": "start page" - }, - "vol": { - "type": "string" - } - }, - "description": "Container has information about the conference or journal where the result has been presented or published" - }, - "contributor": { - "type": "array", - "items": { - "type": "string", - "description": "Contributors for the result" - } - }, - "country": { - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "ISO 3166-1 alpha-2 country code" - }, - "label": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is associated to the country."} - ] - } - } - } - }, - "coverage": { - "type": "array", - "items": { - "type": "string" - } - }, - "dateofcollection": { - "type": "string", - "description": "When OpenAIRE collected the record the last time" - }, - "description": { - "type": "array", - "items": { - "type": "string" - } - }, - "documentationUrl": { - "description": "Only for results with type 'software': URL to the software documentation", - "type": "array", - "items": { - "type": "string" - } - }, - "embargoenddate": { - "type": "string", - "description": "Date when the embargo ends and this result turns Open Access" - }, - "format": { - "type": "array", - "items": { - "type": "string" - } - }, - "geolocation": { - "description": "Geolocation information", - "type": "array", - "items": { - "type": "object", - "properties": { - "box": { - "type": "string" - }, - "place": { - "type": "string" - }, - "point": { - "type": "string" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE Identifier" - }, - "instance":{ - "description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", - "type":"array", - "items":{ - "type":"object", - "properties":{ - "accessright":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "openAccessRoute":{ - "type":"string", - "enum":[ - "gold", - "green", - "hybrid", - "bronze" - ], - "description":"The type of OpenAccess applied to the result" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "articleprocessingcharge":{ - "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", - "type":"object", - "properties":{ - "amount":{ - "type":"string" - }, - "currency":{ - "type":"string" - } - } - }, - "license":{ - "type":"string" - }, - "pid":{ - "description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)", - "type":"array", - "items":{ - "allOf":[ - { - "$ref":"#/definitions/ControlledField" - }, - { - "description":"The persistent identifier associated to the result" - } - ] - } - }, - "publicationdate":{ - "type":"string", - "description": "Date of the research product" - }, - "refereed":{ - "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", - "type":"string" - }, - "type":{ - "type":"string", - "description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" - }, - "url":{ - "description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", - "type":"array", - "items":{ - "type":"string" - } - } - } - } - }, - "language": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "alpha-3/ISO 639-2 code of the language" - }, - "label": { - "type": "string", - "description": "Language label in English" - } - } - }, - "lastupdatetimestamp": { - "type": "integer", - "description": "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle": { - "type": "string", - "descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." - }, - "subtitle": { - "type": "string", - "descriptio": "Explanatory or alternative name by which a scientific result is known." - }, - "originalId": { - "description": "Identifiers of the record at the original sources", - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the result", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "} - ] - } - }, - "programmingLanguage": { - "type": "string", - "description": "Only for results with type 'software': the programming language" - }, - "publicationdate": { - "type": "string", - "description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." - }, - "publisher": { - "type": "string", - "description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." - }, - "size": { - "type": "string", - "description": "Only for results with type 'dataset': the declared size of the dataset" - }, - "source": { - "description": "See definition of Dublin Core field dc:source", - "type": "array", - "items": { - "type": "string" - } - }, - "subjects": { - "description": "Keywords associated to the result", - "type": "array", - "items": { - "type": "object", - "properties": { - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this subject is associated to the result"} - ] - }, - "subject": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} - ] - } - } - } - }, - "tool": { - "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" - }, - "version": { - "type": "string", - "description": "Version of the result" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index fe178795d..3dbf2b09d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -4,8 +4,10 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; + import java.util.HashMap; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -81,7 +83,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); -// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -144,7 +145,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); -// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -203,4 +203,61 @@ public class DumpRelationTest { "and validationDate = '2021-08-06'") .count()); } + + @Test + public void test3() throws Exception {// + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant" + }); + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); + + + Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); + Assertions + .assertEquals( + 1, check + .filter( + "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + + "and provenance = 'Inferred by OpenAIRE'") + .count()); + } + } From d9f80488cc471bcb28b294de523e7ccac9bd571b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 2 Dec 2021 14:15:19 +0100 Subject: [PATCH 064/176] [GRAPH DUMP] Add one more test to check the filtering of the relations --- .../graph/dump/complete/DumpRelationTest.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index 3dbf2b09d..8a2eb585e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -260,4 +260,54 @@ public class DumpRelationTest { .count()); } + @Test + public void test4() throws Exception {// + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant;isAuthorInstitutionOf" + }); + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); + + + Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count()); + + } + } From 9cac283bec9abc1394c6866c8c164a773af1a52f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 2 Dec 2021 17:20:33 +0100 Subject: [PATCH 065/176] implemented Instance serialization features requested in https://support.openaire.eu/issues/7156 --- .../dhp/oa/provision/model/XmlInstance.java | 158 ++++ .../oa/provision/utils/TemplateFactory.java | 13 +- .../oa/provision/utils/XmlRecordFactory.java | 193 ++++- .../dhp/oa/provision/template/instance.st | 2 +- .../oa/provision/XmlRecordFactoryTest.java | 22 +- .../dnetlib/dhp/oa/provision/publication.json | 793 +++++++++++++++++- 6 files changed, 1121 insertions(+), 60 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java new file mode 100644 index 000000000..a38329750 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java @@ -0,0 +1,158 @@ + +package eu.dnetlib.dhp.oa.provision.model; + +import java.util.Set; + +import com.google.common.collect.Sets; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import scala.Serializable; + +public class XmlInstance implements Serializable { + + public static final AccessRight UNKNOWN_ACCESS_RIGHT; + + static { + UNKNOWN_ACCESS_RIGHT = new AccessRight(); + UNKNOWN_ACCESS_RIGHT.setClassid(ModelConstants.UNKNOWN); + UNKNOWN_ACCESS_RIGHT.setClassname(ModelConstants.UNKNOWN); + UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_ACCESS_MODES); + UNKNOWN_ACCESS_RIGHT.setSchemename(ModelConstants.DNET_ACCESS_MODES); + } + + private String url; + + private AccessRight accessright; + + private Set collectedfrom = Sets.newHashSet(); + + private Set hostedby = Sets.newHashSet(); + + private Set instancetype = Sets.newHashSet(); + + private Set license = Sets.newHashSet(); + + // other research products specifc + private Set distributionlocation = Sets.newHashSet(); + + private Set pid = Sets.newHashSet(); + + private Set alternateIdentifier = Sets.newHashSet(); + + private Set dateofacceptance = Sets.newHashSet(); + + // ( article | book ) processing charges. Defined here to cope with possible wrongly typed + // results + private String processingchargeamount; + + // currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly + // typed results + private String processingchargecurrency; + + private Set refereed = Sets.newHashSet();; // peer-review status + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public AccessRight getAccessright() { + return accessright; + } + + public void setAccessright(AccessRight accessright) { + this.accessright = accessright; + } + + public Set getCollectedfrom() { + return collectedfrom; + } + + public void setCollectedfrom(Set collectedfrom) { + this.collectedfrom = collectedfrom; + } + + public Set getHostedby() { + return hostedby; + } + + public void setHostedby(Set hostedby) { + this.hostedby = hostedby; + } + + public Set getInstancetype() { + return instancetype; + } + + public void setInstancetype(Set instancetype) { + this.instancetype = instancetype; + } + + public Set getLicense() { + return license; + } + + public void setLicense(Set license) { + this.license = license; + } + + public Set getDistributionlocation() { + return distributionlocation; + } + + public void setDistributionlocation(Set distributionlocation) { + this.distributionlocation = distributionlocation; + } + + public Set getPid() { + return pid; + } + + public void setPid(Set pid) { + this.pid = pid; + } + + public Set getAlternateIdentifier() { + return alternateIdentifier; + } + + public void setAlternateIdentifier(Set alternateIdentifier) { + this.alternateIdentifier = alternateIdentifier; + } + + public Set getDateofacceptance() { + return dateofacceptance; + } + + public void setDateofacceptance(Set dateofacceptance) { + this.dateofacceptance = dateofacceptance; + } + + public String getProcessingchargeamount() { + return processingchargeamount; + } + + public void setProcessingchargeamount(String processingchargeamount) { + this.processingchargeamount = processingchargeamount; + } + + public String getProcessingchargecurrency() { + return processingchargecurrency; + } + + public void setProcessingchargecurrency(String processingchargecurrency) { + this.processingchargecurrency = processingchargecurrency; + } + + public Set getRefereed() { + return refereed; + } + + public void setRefereed(Set refereed) { + this.refereed = refereed; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 7487f0956..87c0261ac 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -8,11 +8,16 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; +import javax.swing.text.html.Option; + import org.apache.commons.lang3.StringUtils; import org.stringtemplate.v4.ST; +import com.google.common.collect.Lists; + import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.OafEntity; @@ -94,13 +99,15 @@ public class TemplateFactory { } public String getInstance( - final String resultId, final List instancemetadata, final List webresources) { + final List instancemetadata, final String url) { return getTemplate(resources.getInstance()) - .add("instanceId", escapeXml(removePrefix(resultId))) .add("metadata", instancemetadata) .add( "webresources", - (webresources != null ? webresources : new ArrayList()) + Optional + .ofNullable(url) + .map(u -> Lists.newArrayList(url)) + .orElse(Lists.newArrayList()) .stream() .filter(StringUtils::isNotBlank) .map(this::getWebResource) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 19300d77d..6926f90d0 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; +import java.net.URL; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -17,7 +18,10 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collector; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; @@ -27,7 +31,11 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.solr.common.util.URLUtil; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -49,25 +57,9 @@ import com.mycila.xmltool.XMLTag; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.MainEntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.ExternalReference; -import eu.dnetlib.dhp.schema.oaf.ExtraInfo; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.oa.provision.model.XmlInstance; +import eu.dnetlib.dhp.schema.common.*; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { @@ -1112,37 +1104,64 @@ public class XmlRecordFactory implements Serializable { if (MainEntityType.result.toString().equals(ModelSupport.getMainType(entityType))) { final List instances = ((Result) entity).getInstance(); if (instances != null) { - for (final Instance instance : ((Result) entity).getInstance()) { + groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> { final List fields = Lists.newArrayList(); if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { fields .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } - if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { + if (instance.getCollectedfrom() != null) { fields - .add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); + .addAll( + instance + .getCollectedfrom() + .stream() + .filter(cf -> kvNotBlank(cf)) + .map(cf -> XmlSerializationUtils.mapKeyValue("collectedfrom", cf)) + .collect(Collectors.toList())); } - if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { - fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); - } - if (instance.getDateofacceptance() != null - && isNotBlank(instance.getDateofacceptance().getValue())) { + + if (instance.getHostedby() != null) { fields - .add( - XmlSerializationUtils - .asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue())); + .addAll( + instance + .getHostedby() + .stream() + .filter(hb -> kvNotBlank(hb)) + .map(hb -> XmlSerializationUtils.mapKeyValue("hostedby", hb)) + .collect(Collectors.toList())); } - if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) { + if (instance.getDateofacceptance() != null) { fields - .add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); + .addAll( + instance + .getDateofacceptance() + .stream() + .filter(d -> isNotBlank(d)) + .map(d -> XmlSerializationUtils.asXmlElement("dateofacceptance", d)) + .collect(Collectors.toList())); } - if (isNotBlank(instance.getDistributionlocation())) { + if (instance.getInstancetype() != null) { fields - .add( - XmlSerializationUtils - .asXmlElement("distributionlocation", instance.getDistributionlocation())); + .addAll( + instance + .getInstancetype() + .stream() + .filter(t -> !t.isBlank()) + .map(t -> XmlSerializationUtils.mapQualifier("instancetype", t)) + .collect(Collectors.toList())); + } + if (instance.getDistributionlocation() != null) { + fields + .addAll( + instance + .getDistributionlocation() + .stream() + .filter(d -> isNotBlank(d)) + .map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d)) + .collect(Collectors.toList())); } if (instance.getPid() != null) { fields @@ -1165,32 +1184,39 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } - if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { + if (instance.getRefereed() != null) { fields - .add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); + .addAll( + instance + .getRefereed() + .stream() + .filter(Objects::nonNull) + .filter(r -> !r.isBlank()) + .map(r -> XmlSerializationUtils.mapQualifier("refereed", r)) + .collect(Collectors.toList())); } if (instance.getProcessingchargeamount() != null - && isNotBlank(instance.getProcessingchargeamount().getValue())) { + && isNotBlank(instance.getProcessingchargeamount())) { fields .add( XmlSerializationUtils .asXmlElement( - "processingchargeamount", instance.getProcessingchargeamount().getValue())); + "processingchargeamount", instance.getProcessingchargeamount())); } if (instance.getProcessingchargecurrency() != null - && isNotBlank(instance.getProcessingchargecurrency().getValue())) { + && isNotBlank(instance.getProcessingchargecurrency())) { fields .add( XmlSerializationUtils .asXmlElement( - "processingchargecurrency", instance.getProcessingchargecurrency().getValue())); + "processingchargecurrency", instance.getProcessingchargecurrency())); } children .add( templateFactory - .getInstance(instance.getHostedby().getKey(), fields, instance.getUrl())); - } + .getInstance(fields, instance.getUrl())); + }); } final List ext = ((Result) entity).getExternalReference(); if (ext != null) { @@ -1234,6 +1260,85 @@ public class XmlRecordFactory implements Serializable { return children; } + private Stream groupInstancesByUrl(List instance) { + return instance + .stream() + .map(i -> { + i + .setUrl( + i + .getUrl() + .stream() + .filter(this::isValidUrl) + .collect(Collectors.toList())); + return i; + }) + .filter( + i -> Optional + .ofNullable(i.getUrl()) + .map(u -> !u.isEmpty()) + .orElse(false)) + .map(this::pickByUrl) + .collect(Collectors.groupingBy(ImmutablePair::getLeft)) + .values() + .stream() + .map(this::mergeInstances); + } + + private boolean isValidUrl(String url) { + try { + new URL(url).toURI(); + return true; + } catch (Exception e) { + return false; + } + } + + private ImmutablePair pickByUrl(Instance i) { + return new ImmutablePair<>(i.getUrl().get(0), i); + } + + private XmlInstance mergeInstances(List> instances) { + + final XmlInstance instance = new XmlInstance(); + + instance.setUrl(instances.get(0).getLeft()); + instance + .setAccessright( + instances + .stream() + .map(Pair::getValue) + .map(Instance::getAccessright) + .min(new AccessRightComparator()) + .orElse(XmlInstance.UNKNOWN_ACCESS_RIGHT)); + + instances.forEach(p -> { + final Instance i = p.getRight(); + instance.getCollectedfrom().add(i.getCollectedfrom()); + instance.getHostedby().add(i.getHostedby()); + instance.getInstancetype().add(i.getInstancetype()); + instance.getLicense().add(i.getLicense().getValue()); + instance.getDistributionlocation().add(i.getDistributionlocation()); + instance.getPid().addAll(i.getPid()); + instance.getAlternateIdentifier().addAll(i.getAlternateIdentifier()); + instance.getDateofacceptance().add(i.getDateofacceptance().getValue()); + instance + .setProcessingchargeamount( + Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null)); + instance + .setProcessingchargecurrency( + Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); + instance.getRefereed().add(i.getRefereed()); + }); + + if (instance.getHostedby().size() > 1 + && instance.getHostedby().stream().anyMatch(hb -> ModelConstants.UNKNOWN_REPOSITORY.equals(hb))) { + instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY); + } + + return instance; + } + private boolean isDuplicate(final RelatedEntityWrapper link) { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st index 64bed05b4..811d10936 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st @@ -1,4 +1,4 @@ - + $metadata:{ it | $it$ }$ $webresources:{ it | $it$ }$ \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 2b5e08e92..a4b6182bc 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -12,7 +12,6 @@ import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.DeserializationFeature; @@ -54,18 +53,19 @@ public class XmlRecordFactoryTest { System.out.println(doc.asXML()); - Assertions.assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); - Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); + assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); + assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); - Assertions.assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid")); - Assertions.assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending")); + assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid")); + assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending")); - Assertions.assertEquals("doi", doc.valueOf("//instance/pid/@classid")); - Assertions.assertEquals("10.1109/TED.2018.2853550", doc.valueOf("//instance/pid/text()")); + assertEquals("doi", doc.valueOf("//instance/pid/@classid")); + assertEquals("10.1109/TED.2018.2853550", doc.valueOf("//instance/pid/text()")); - Assertions.assertEquals("doi", doc.valueOf("//instance/alternateidentifier/@classid")); - Assertions.assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()")); - // TODO add assertions based of values extracted from the XML record + assertEquals("doi", doc.valueOf("//instance/alternateidentifier/@classid")); + assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()")); + + assertEquals(3, doc.selectNodes("//instance").size()); } @Test @@ -96,7 +96,7 @@ public class XmlRecordFactoryTest { final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); - Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); + assertEquals("2021-01-01", doc.valueOf("//validated/@date")); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index 91f159853..9794fd956 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -412,9 +412,800 @@ "value": "" }, "url": [ - "http://juuli.fi/Record/0331473718", "http://dx.doi.org/10.1109/TED.2018.2853550" ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "Closed Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______3456::b2b9ce8435390bcbfc05f3cae3948567", + "value": "A wonderful repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2020-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853550" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853551" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853552", + "http://dx.doi.org/10.1109/TED.2018.2853554" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "asdasd://not a URL" + ] } ], "journal": { From 863a2f9db3497b5d2f8263f61adec878c51c8cba Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 3 Dec 2021 09:08:12 +0100 Subject: [PATCH 066/176] avoid to filter OAF records defined as invisible = true --- .../oaf/utils/GraphCleaningFunctions.java | 16 + .../clean/GraphCleaningFunctionsTest.java | 47 + .../dhp/oa/graph/clean/result_invisible.json | 958 ++++++++++++++++++ .../graph/clean/result_missing_invisible.json | 957 +++++++++++++++++ 4 files changed, 1978 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 592580ab8..f5781390a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -86,6 +86,22 @@ public class GraphCleaningFunctions extends CleaningFunctions { } public static boolean filter(T value) { + if (Boolean.TRUE + .equals( + Optional + .ofNullable(value) + .map( + o -> Optional + .ofNullable(o.getDataInfo()) + .map( + d -> Optional + .ofNullable(d.getInvisible()) + .orElse(true)) + .orElse(true)) + .orElse(true))) { + return false; + } + if (value instanceof Datasource) { // nothing to evaluate here } else if (value instanceof Project) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index b69d0c08b..0264a6266 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -74,6 +74,53 @@ public class GraphCleaningFunctionsTest { } } + @Test + void testFilter_false() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(false, GraphCleaningFunctions.filter(p_in)); + } + + @Test + void testFilter_true() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + + @Test + void testFilter_missing_invisible() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + @Test void testCleaning() throws Exception { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json new file mode 100644 index 000000000..ffcb187c1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json @@ -0,0 +1,958 @@ +{ + "author": [ + { + "affiliation": [ + ], + "fullname": "Brien, Tom", + "name": "Tom", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } + ], + "rank": 1, + "surname": "Brien" + }, + { + "affiliation": [ + ], + "fullname": "Ade, Peter", + "name": "Peter", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "xyz", + "classname": "XYZ", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "qwerty" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID", + "classname": "ORCID", + "schemeid": "", + "schemename": "" + }, + "value": "asdasd" + } + ], + "rank": 2, + "surname": "Ade" + }, + { + "affiliation": [ + ], + "fullname": "Barry, Peter S.", + "name": "Peter S.", + "pid": null, + "rank": 3, + "surname": "Barry" + }, + { + "affiliation": [ + ], + "fullname": "Dunscombe, Chris J.", + "name": "Chris J.", + "pid": [ + ], + "rank": 4, + "surname": "Dunscombe" + }, + { + "affiliation": [ + ], + "fullname": "Leadley, David R.", + "name": "David R.", + "pid": [ + ], + "rank": 5, + "surname": "Leadley" + }, + { + "affiliation": [ + ], + "fullname": "Morozov, Dmitry V.", + "name": "Dmitry V.", + "pid": [ + ], + "rank": 6, + "surname": "Morozov" + }, + { + "affiliation": [ + ], + "fullname": "Myronov, Maksym", + "name": "Maksym", + "pid": [ + ], + "rank": 7, + "surname": "Myronov" + }, + { + "affiliation": [ + ], + "fullname": "Parker, Evan", + "name": "Evan", + "pid": [ + ], + "rank": 8, + "surname": "Parker" + }, + { + "affiliation": [ + ], + "fullname": "Prest, Martin J.", + "name": "Martin J.", + "pid": [ + ], + "rank": 9, + "surname": "Prest" + }, + { + "affiliation": [ + ], + "fullname": "Prunnila, Mika", + "name": "Mika", + "pid": [ + ], + "rank": 10, + "surname": "Prunnila" + }, + { + "affiliation": [ + ], + "fullname": "Sudiwala, Rashmi V.", + "name": "Rashmi V.", + "pid": [ + ], + "rank": 11, + "surname": "Sudiwala" + }, + { + "affiliation": [ + ], + "fullname": "Whall, Terry E.", + "name": "Terry E.", + "pid": [ + ], + "rank": 12, + "surname": "Whall" + }, + { + "affiliation": [ + ], + "fullname": "Mauskopf", + "name": "", + "pid": [ + ], + "rank": 13, + "surname": "" + }, + { + "affiliation": [ + ], + "fullname": " P. D. ", + "name": "", + "pid": [ + ], + "rank": 14, + "surname": "" + } + ], + "bestaccessright": null, + "publisher": { + "value": null + }, + "collectedfrom": [ + { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + } + ], + "context": [ + ], + "contributor": [ + ], + "country": [ + { + "classid": "DE", + "classname": "DE", + "schemeid": "dnet:countries", + "schemename": "dnet:countries" + } + ], + "coverage": [ + ], + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": true, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "7 oct 1970" + }, + "dateofcollection": "", + "dateoftransformation": "2020-04-22T12:34:08.009Z", + "description": [ + ], + "externalReference": [ + ], + "extraInfo": [ + ], + "format": [ + ], + "fulltext": [ + ], + "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", + "instance": [ + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Comment/debate", + "classname": "Comment/debate", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://juuli.fi/Record/0275158616", + "http://dx.doi.org/10.1007/s109090161569x" + ] + } + ], + "journal": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "edition": "", + "ep": " 7", + "iss": "9 March", + "issnLinking": "", + "issnOnline": "", + "issnPrinted": "0022-2291", + "name": "Journal of Low Temperature Physics - Early Acces", + "sp": "1 ", + "vol": "" + }, + "language": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "lastupdatetimestamp": 1591283286319, + "oaiprovenance": { + "originDescription": { + "altered": true, + "baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif", + "datestamp": "2019-07-30", + "harvestDate": "2020-04-22T11:04:38.685Z", + "identifier": "oai:virta-jtp.csc.fi:Publications/0275158616", + "metadataNamespace": "" + } + }, + "originalId": [ + "CSC_________::2250a70c903c6ac6e4c01438259e9375" + ], + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "" + } + ], + "relevantdate": [ + ], + "resourcetype": { + "classid": "0001", + "classname": "0001", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "source": [ + ], + "subject": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "ta213" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "infrared detectors" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "lens antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "slot antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "strained silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "cold electron bolometers" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "doped silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "measure noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "noise equivalent power" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical characterisation" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical response" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "photon noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon absorbers" + } + ], + "title": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "test test 123 test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "omic" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "-" + } + ] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json new file mode 100644 index 000000000..ac6884741 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json @@ -0,0 +1,957 @@ +{ + "author": [ + { + "affiliation": [ + ], + "fullname": "Brien, Tom", + "name": "Tom", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } + ], + "rank": 1, + "surname": "Brien" + }, + { + "affiliation": [ + ], + "fullname": "Ade, Peter", + "name": "Peter", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "xyz", + "classname": "XYZ", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "qwerty" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID", + "classname": "ORCID", + "schemeid": "", + "schemename": "" + }, + "value": "asdasd" + } + ], + "rank": 2, + "surname": "Ade" + }, + { + "affiliation": [ + ], + "fullname": "Barry, Peter S.", + "name": "Peter S.", + "pid": null, + "rank": 3, + "surname": "Barry" + }, + { + "affiliation": [ + ], + "fullname": "Dunscombe, Chris J.", + "name": "Chris J.", + "pid": [ + ], + "rank": 4, + "surname": "Dunscombe" + }, + { + "affiliation": [ + ], + "fullname": "Leadley, David R.", + "name": "David R.", + "pid": [ + ], + "rank": 5, + "surname": "Leadley" + }, + { + "affiliation": [ + ], + "fullname": "Morozov, Dmitry V.", + "name": "Dmitry V.", + "pid": [ + ], + "rank": 6, + "surname": "Morozov" + }, + { + "affiliation": [ + ], + "fullname": "Myronov, Maksym", + "name": "Maksym", + "pid": [ + ], + "rank": 7, + "surname": "Myronov" + }, + { + "affiliation": [ + ], + "fullname": "Parker, Evan", + "name": "Evan", + "pid": [ + ], + "rank": 8, + "surname": "Parker" + }, + { + "affiliation": [ + ], + "fullname": "Prest, Martin J.", + "name": "Martin J.", + "pid": [ + ], + "rank": 9, + "surname": "Prest" + }, + { + "affiliation": [ + ], + "fullname": "Prunnila, Mika", + "name": "Mika", + "pid": [ + ], + "rank": 10, + "surname": "Prunnila" + }, + { + "affiliation": [ + ], + "fullname": "Sudiwala, Rashmi V.", + "name": "Rashmi V.", + "pid": [ + ], + "rank": 11, + "surname": "Sudiwala" + }, + { + "affiliation": [ + ], + "fullname": "Whall, Terry E.", + "name": "Terry E.", + "pid": [ + ], + "rank": 12, + "surname": "Whall" + }, + { + "affiliation": [ + ], + "fullname": "Mauskopf", + "name": "", + "pid": [ + ], + "rank": 13, + "surname": "" + }, + { + "affiliation": [ + ], + "fullname": " P. D. ", + "name": "", + "pid": [ + ], + "rank": 14, + "surname": "" + } + ], + "bestaccessright": null, + "publisher": { + "value": null + }, + "collectedfrom": [ + { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + } + ], + "context": [ + ], + "contributor": [ + ], + "country": [ + { + "classid": "DE", + "classname": "DE", + "schemeid": "dnet:countries", + "schemename": "dnet:countries" + } + ], + "coverage": [ + ], + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "7 oct 1970" + }, + "dateofcollection": "", + "dateoftransformation": "2020-04-22T12:34:08.009Z", + "description": [ + ], + "externalReference": [ + ], + "extraInfo": [ + ], + "format": [ + ], + "fulltext": [ + ], + "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", + "instance": [ + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Comment/debate", + "classname": "Comment/debate", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://juuli.fi/Record/0275158616", + "http://dx.doi.org/10.1007/s109090161569x" + ] + } + ], + "journal": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "edition": "", + "ep": " 7", + "iss": "9 March", + "issnLinking": "", + "issnOnline": "", + "issnPrinted": "0022-2291", + "name": "Journal of Low Temperature Physics - Early Acces", + "sp": "1 ", + "vol": "" + }, + "language": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "lastupdatetimestamp": 1591283286319, + "oaiprovenance": { + "originDescription": { + "altered": true, + "baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif", + "datestamp": "2019-07-30", + "harvestDate": "2020-04-22T11:04:38.685Z", + "identifier": "oai:virta-jtp.csc.fi:Publications/0275158616", + "metadataNamespace": "" + } + }, + "originalId": [ + "CSC_________::2250a70c903c6ac6e4c01438259e9375" + ], + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "" + } + ], + "relevantdate": [ + ], + "resourcetype": { + "classid": "0001", + "classname": "0001", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "source": [ + ], + "subject": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "ta213" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "infrared detectors" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "lens antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "slot antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "strained silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "cold electron bolometers" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "doped silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "measure noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "noise equivalent power" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical characterisation" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical response" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "photon noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon absorbers" + } + ], + "title": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "test test 123 test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "omic" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "-" + } + ] +} \ No newline at end of file From f7011b90d8397a054d557a088b0439bc18af2b0e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 3 Dec 2021 11:15:09 +0100 Subject: [PATCH 067/176] format code --- .../test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index 477a4326a..8c8b4b5bf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -18,6 +18,7 @@ import java.text.SimpleDateFormat import java.util.Locale import scala.io.Source import org.junit.jupiter.api.Assertions._ + @ExtendWith(Array(classOf[MockitoExtension])) class DataciteToOAFTest extends AbstractVocabularyTest{ From 0fa0ce33d6d8e69079d6f55b10c89eca6911aedd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 3 Dec 2021 11:47:35 +0100 Subject: [PATCH 068/176] removed duplicated on gitignore --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index f4fb46f2e..0b794ef74 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,6 @@ *.iws *.ipr *.iml -*.ipr -*.iws *~ .vscode .metals From 4bb1d43afc45493673bf7055c7b332bb42189274 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 3 Dec 2021 12:35:51 +0100 Subject: [PATCH 069/176] - --- .../dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 37ba96abe..6dc56c846 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -917,8 +917,7 @@ public class DumpJobTest { DumpProducts dump = new DumpProducts(); dump .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -949,6 +948,19 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); + temp = spark + .sql( + "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + + "from check " + + "lateral view explode (instance) i as inst " + + "where inst.articleprocessingcharge is not null"); + + + Assertions.assertEquals("3131.64", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(1)); + Assertions.assertEquals("EUR", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(2)); + + Assertions.assertEquals("2578.35", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(1)); + Assertions.assertEquals("EUR", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(2)); } } From 08795cbd302dd8c5f176b8f8d64444d4a3a854d0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 6 Dec 2021 10:39:56 +0100 Subject: [PATCH 070/176] using helper method from ModelSupport to find the inverse relation descriptor --- .../main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 7d0d6b0b8..380991526 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -90,7 +90,7 @@ public class ReadBlacklistFromDB implements Closeable { inverse.setSource(target_direct); String encoding = rs.getString("relationship"); - RelationInverse ri = ModelSupport.relationInverseMap.get(encoding); + RelationInverse ri = ModelSupport.findInverse(encoding); direct.setRelClass(ri.getRelClass()); inverse.setRelClass(ri.getInverseRelClass()); direct.setRelType(ri.getRelType()); From 91327277939f80c4b1ba938f902489a1f4687827 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 6 Dec 2021 10:54:05 +0100 Subject: [PATCH 071/176] fixed date cleaning test --- .../eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 4068f0abb..8804469fa 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -107,7 +107,7 @@ class OafMapperUtilsTest { assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get()); assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get()); assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get()); - assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get()); + assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get()); assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get()); assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get()); assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get()); From 7af0bbd0b1da976b628f123d9134724f4fa3cea9 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 6 Dec 2021 11:26:36 +0100 Subject: [PATCH 072/176] [scala-refactor] Module dhp-aggregation: Moved all scala source into src/main/scala and src/test/scala --- .../scholix/SparkCreateActionset.scala | 69 --------------- .../scholix/SparkSaveActionSet.scala | 86 ------------------- .../dhp/collection/CollectionUtils.scala | 0 .../dhp/datacite/AbstractRestClient.scala | 0 .../dhp/datacite/DataciteAPIImporter.scala | 0 .../dhp/datacite/DataciteModelConstants.scala | 2 +- .../DataciteToOAFTransformation.scala | 35 ++++---- .../GenerateDataciteDatasetSpark.scala | 0 .../dnetlib/dhp/datacite/ImportDatacite.scala | 0 .../SparkDownloadUpdateDatacite.scala | 0 .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 1 + .../bio/SparkTransformBioDatabaseToOAF.scala | 12 +-- .../ebi/SparkCreateBaselineDataFrame.scala | 2 +- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 3 +- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 5 +- .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 0 .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 19 ++-- .../dhp/datacite/DataciteToOAFTest.scala | 2 +- .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 0 19 files changed, 39 insertions(+), 197 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/collection/CollectionUtils.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/AbstractRestClient.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala (93%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/ImportDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala (73%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala (93%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala (96%) rename dhp-workflows/dhp-aggregation/src/test/{java => scala}/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala (99%) rename dhp-workflows/dhp-aggregation/src/test/{java => scala}/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala (100%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala deleted file mode 100644 index 7a87861db..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala +++ /dev/null @@ -1,69 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.scholix - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import org.apache.spark.SparkConf -import org.apache.spark.sql._ -import org.slf4j.{Logger, LoggerFactory} - -import scala.io.Source - -object SparkCreateActionset { - - def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/generate_actionset.json")).mkString) - parser.parseArgument(args) - - - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val sourcePath = parser.get("sourcePath") - log.info(s"sourcePath -> $sourcePath") - - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - - val workingDirFolder = parser.get("workingDirFolder") - log.info(s"workingDirFolder -> $workingDirFolder") - - implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val resultEncoders: Encoder[Result] = Encoders.kryo[Result] - implicit val relationEncoders: Encoder[Relation] = Encoders.kryo[Relation] - - import spark.implicits._ - - val relation = spark.read.load(s"$sourcePath/relation").as[Relation] - - relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource, r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") - - - val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] - - log.info("extract source and target Identifier involved in relations") - - - log.info("save relation filtered") - - relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") - - log.info("saving entities") - - val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) - - entities - .joinWith(idRelation, entities("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala deleted file mode 100644 index 1df7ea3fb..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala +++ /dev/null @@ -1,86 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.scholix - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} -import org.apache.hadoop.io.Text -import org.apache.hadoop.io.compress.GzipCodec -import org.apache.hadoop.mapred.SequenceFileOutputFormat -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -import scala.io.Source - -object SparkSaveActionSet { - - - def toActionSet(item: Oaf): (String, String) = { - val mapper = new ObjectMapper() - - item match { - case dataset: OafDataset => - val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset] - a.setClazz(classOf[OafDataset]) - a.setPayload(dataset) - (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case publication: Publication => - val a: AtomicAction[Publication] = new AtomicAction[Publication] - a.setClazz(classOf[Publication]) - a.setPayload(publication) - (publication.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case software: Software => - val a: AtomicAction[Software] = new AtomicAction[Software] - a.setClazz(classOf[Software]) - a.setPayload(software) - (software.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case orp: OtherResearchProduct => - val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct] - a.setClazz(classOf[OtherResearchProduct]) - a.setPayload(orp) - (orp.getClass.getCanonicalName, mapper.writeValueAsString(a)) - - case relation: Relation => - val a: AtomicAction[Relation] = new AtomicAction[Relation] - a.setClazz(classOf[Relation]) - a.setPayload(relation) - (relation.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case _ => - null - } - - } - - def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/save_actionset.json")).mkString) - parser.parseArgument(args) - - - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val sourcePath = parser.get("sourcePath") - log.info(s"sourcePath -> $sourcePath") - - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - - implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val tEncoder: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING) - - spark.read.load(sourcePath).as[Oaf] - .map(o => toActionSet(o)) - .filter(o => o != null) - .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec]) - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala index 0685a04a3..6c5dc8cce 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.datacite import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue} import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue} import java.io.InputStream import java.time.format.DateTimeFormatter diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 6b4deef0a..a662cf99d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.datacite.DataciteModelConstants._ import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} -import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils import org.json4s.DefaultFormats @@ -29,6 +29,7 @@ object DataciteToOAFTransformation { /** * This method should skip record if json contains invalid text * defined in gile datacite_filter + * * @param json * @return True if the record should be skipped */ @@ -107,9 +108,9 @@ object DataciteToOAFTransformation { d } - def fix_thai_date(input:String, format:String) :String = { + def fix_thai_date(input: String, format: String): String = { try { - val a_date = LocalDate.parse(input,DateTimeFormatter.ofPattern(format)) + val a_date = LocalDate.parse(input, DateTimeFormatter.ofPattern(format)) val d = ThaiBuddhistDate.of(a_date.getYear, a_date.getMonth.getValue, a_date.getDayOfMonth) LocalDate.from(d).toString } catch { @@ -236,7 +237,7 @@ object DataciteToOAFTransformation { val p = match_pattern.get._2 val grantId = m.matcher(awardUri).replaceAll("$2") val targetId = s"$p${DHPUtils.md5(grantId)}" - List( generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo) ) + List(generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo)) } else List() @@ -335,15 +336,15 @@ object DataciteToOAFTransformation { .map(d => d.get) if (a_date.isDefined) { - if(doi.startsWith("10.14457")) - result.setEmbargoenddate(OafMapperUtils.field(fix_thai_date(a_date.get,"[yyyy-MM-dd]"), null)) + if (doi.startsWith("10.14457")) + result.setEmbargoenddate(OafMapperUtils.field(fix_thai_date(a_date.get, "[yyyy-MM-dd]"), null)) else result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null)) } if (i_date.isDefined && i_date.get.isDefined) { - if(doi.startsWith("10.14457")) { - result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get,"[yyyy-MM-dd]"), null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get,"[yyyy-MM-dd]"), null)) + if (doi.startsWith("10.14457")) { + result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null)) + result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null)) } else { result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) @@ -351,9 +352,9 @@ object DataciteToOAFTransformation { } } else if (publication_year != null) { - if(doi.startsWith("10.14457")) { - result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year","[dd-MM-yyyy]"), null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year","[dd-MM-yyyy]"), null)) + if (doi.startsWith("10.14457")) { + result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null)) + result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null)) } else { result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) @@ -457,7 +458,7 @@ object DataciteToOAFTransformation { JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) - relations = relations ::: generateRelations(rels,result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null) + relations = relations ::: generateRelations(rels, result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null) } if (relations != null && relations.nonEmpty) { List(result) ::: relations @@ -466,7 +467,7 @@ object DataciteToOAFTransformation { List(result) } - private def generateRelations(rels: List[RelatedIdentifierType], id:String, date:String):List[Relation] = { + private def generateRelations(rels: List[RelatedIdentifierType], id: String, date: String): List[Relation] = { rels .filter(r => subRelTypeMapping.contains(r.relationType) && ( @@ -484,12 +485,12 @@ object DataciteToOAFTransformation { rel.setSubRelType(subRelType) rel.setRelClass(r.relationType) - val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) + val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) rel.setProperties(List(dateProps).asJava) rel.setSource(id) - rel.setTarget(DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier,r.relatedIdentifierType)) + rel.setTarget(DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel @@ -504,4 +505,4 @@ object DataciteToOAFTransformation { } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 70dcc0184..853b24862 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -7,6 +7,7 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.{compact, parse, render} import collection.JavaConverters._ + object BioDBToOAF { case class EBILinkItem(id: Long, links: String) {} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 73% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 8ae8285e3..fcceacd44 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,9 +1,9 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Oaf -import BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -36,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 17d21f19c..660a26a6c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} +import eu.dnetlib.dhp.sx.bio.pubmed._ import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index eab6b1dc6..18e39387f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,9 +1,8 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 8da617ca0..12af4824b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -38,7 +37,7 @@ object SparkEBILinksToOaf { ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) + .flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index ecef32202..4a93a7cb4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -4,7 +4,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ -import scala.collection.JavaConverters._ +import collection.JavaConverters._ import java.util.regex.Pattern @@ -22,10 +22,10 @@ object PubMedToOaf { val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") - /** * Cleaning the DOI Applying regex in order to * remove doi starting with URL + * * @param doi input DOI * @return cleaned DOI */ @@ -49,7 +49,7 @@ object PubMedToOaf { * starting from OAF instanceType value * * @param cobjQualifier OAF instance type - * @param vocabularies All dnet vocabularies + * @param vocabularies All dnet vocabularies * @return the correct instance */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { @@ -65,7 +65,7 @@ object PubMedToOaf { } /** - * Mapping the Pubmedjournal info into the OAF Journale + * Mapping the Pubmedjournal info into the OAF Journale * * @param j the pubmedJournal * @return the OAF Journal @@ -91,9 +91,8 @@ object PubMedToOaf { * Find vocabulary term into synonyms and term in the vocabulary * * @param vocabularyName the input vocabulary name - * @param vocabularies all the vocabularies - * @param term the term to find - * + * @param vocabularies all the vocabularies + * @param term the term to find * @return the cleaned term value */ def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { @@ -104,10 +103,9 @@ object PubMedToOaf { /** - * Map the Pubmed Article into the OAF instance + * Map the Pubmed Article into the OAF instance * - * - * @param article the pubmed articles + * @param article the pubmed articles * @param vocabularies the vocabularies * @return The OAF instance if the mapping did not fail */ @@ -185,7 +183,6 @@ object PubMedToOaf { //-------------------------------------------------------------------------------------- - // RESULT MAPPING //-------------------------------------------------------------------------------------- result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala rename to dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index 8c8b4b5bf..5bb6ba67d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -8,6 +8,7 @@ import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.{col, count} import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension @@ -17,7 +18,6 @@ import java.nio.file.{Files, Path} import java.text.SimpleDateFormat import java.util.Locale import scala.io.Source -import org.junit.jupiter.api.Assertions._ @ExtendWith(Array(classOf[MockitoExtension])) class DataciteToOAFTest extends AbstractVocabularyTest{ diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala rename to dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala From 81bf604059cac4887700bee192d25bd320f6ccd7 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 6 Dec 2021 11:29:24 +0100 Subject: [PATCH 073/176] [scala-refactor] Module dhp-common: Moved all scala source into src/main/scala and src/test/scala --- .../eu/dnetlib/dhp/application/SparkScalaApplication.scala | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dhp-common/src/main/{java => scala}/eu/dnetlib/dhp/application/SparkScalaApplication.scala (100%) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala similarity index 100% rename from dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala rename to dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala From bf880e250820025faa1e99fadb4e93c1d74a3ca8 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 6 Dec 2021 13:57:41 +0100 Subject: [PATCH 074/176] [scala-refactor] Module dhp-graph-mapper: Moved all scala source into src/main/scala and src/test/scala --- .../oa/graph/hostedbymap/Aggregators.scala | 2 +- .../SparkApplyHostedByMapToDatasource.scala | 13 +- .../SparkApplyHostedByMapToResult.scala | 19 ++- .../SparkPrepareHostedByInfoToApply.scala | 35 ++--- .../hostedbymap/SparkProduceHostedByMap.scala | 109 +++++++------- .../raw/CopyHdfsOafSparkApplication.scala | 8 +- .../resolution/SparkResolveEntities.scala | 5 +- .../resolution/SparkResolveRelation.scala | 2 +- .../sx/graphimport/SparkDataciteToOAF.scala | 1 - .../graph/SparkConvertDatasetToJsonRDD.scala | 10 +- .../sx/graph/SparkConvertObjectToJson.scala | 10 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 27 ++-- .../dhp/sx/graph/SparkCreateInputGraph.scala | 27 ++-- .../dhp/sx/graph/SparkCreateScholix.scala | 28 ++-- .../sx/graph/SparkCreateSummaryObject.scala | 12 +- .../dhp/sx/graph/pangaea/PangaeaUtils.scala | 1 + .../SparkGeneratePanagaeaDataset.scala | 17 +-- .../dhp/sx/graph/scholix/ScholixUtils.scala | 141 ++++++++---------- .../dhp/oa/graph/hostedbymap/TestApply.scala | 0 .../oa/graph/hostedbymap/TestPrepare.scala | 4 - .../oa/graph/hostedbymap/TestPreprocess.scala | 5 +- .../resolution/ResolveEntitiesTest.scala | 0 .../sx/graph/scholix/ScholixGraphTest.scala | 0 23 files changed, 219 insertions(+), 257 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala (81%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala (74%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala (88%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala (95%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala (96%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala (69%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala (62%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala (68%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala (96%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala (98%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala (100%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index ce383292c..ad4e1c96e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala similarity index 81% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 1b18ba3ae..38af3eee4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -2,13 +2,12 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} @@ -52,18 +51,18 @@ object SparkApplyHostedByMapToDatasource { val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + val dats: Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 0e047d016..204325982 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -5,16 +5,14 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import eu.dnetlib.dhp.schema.oaf.{Instance, OpenAccessRoute, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ - object SparkApplyHostedByMapToResult { def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { @@ -25,7 +23,7 @@ object SparkApplyHostedByMapToResult { val ei: EntityInfo = t2._2 val i = p.getInstance().asScala if (i.size == 1) { - val inst: Instance = i(0) + val inst: Instance = i.head inst.getHostedby.setKey(ei.getHostedById) inst.getHostedby.setValue(ei.getName) if (ei.getOpenAccess) { @@ -39,6 +37,7 @@ object SparkApplyHostedByMapToResult { p })(Encoders.bean(classOf[Publication])) } + def main(args: Array[String]): Unit = { @@ -67,18 +66,18 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + val pubs: Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + val pinfo: Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala similarity index 74% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index b7a7d352f..87e203e4b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,61 +3,58 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo - import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} - - object SparkPrepareHostedByInfoToApply { implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { - var lst:List[EntityInfo] = List() + def getList(id: String, j: Journal, name: String): List[EntityInfo] = { + var lst: List[EntityInfo] = List() - if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst } - if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst } - if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst } lst } - def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + def prepareResultInfo(spark: SparkSession, publicationPath: String): Dataset[EntityInfo] = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) val mapper = new ObjectMapper() - val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + val dd: Dataset[Publication] = spark.read.textFile(publicationPath) .map(r => mapper.readValue(r, classOf[Publication])) - dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + dd.filter(p => p.getJournal != null).flatMap(p => getList(p.getId, p.getJournal, "")) } - def toEntityInfo(input:String): EntityInfo = { + def toEntityInfo(input: String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] toEntityItem(c.keys.head, c.values.head) } - def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + def toEntityItem(journal_id: String, hbi: HostedByItemType): EntityInfo = { EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) @@ -67,7 +64,7 @@ object SparkPrepareHostedByInfoToApply { Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 - if(t2._2 != null ){ + if (t2._2 != null) { val ds = t2._2 res.setHostedById(ds.getId) res.setOpenAccess(ds.getOpenAccess) @@ -107,10 +104,10 @@ object SparkPrepareHostedByInfoToApply { //STEP1: read the hostedbymap and transform it in EntityInfo - val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + val hostedByInfo: Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) - val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) + val resultInfoDataset: Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 1ee1d5d1a..6dfe35623 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,41 +1,39 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import com.fasterxml.jackson.databind.ObjectMapper -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.FileSystem -import org.apache.hadoop.fs.Path + import java.io.PrintWriter -import org.apache.hadoop.io.compress.GzipCodec - - object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)): HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 val gold: HostedByInfo = input._2 val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => null + case whoa => null } } @@ -44,7 +42,7 @@ object SparkProduceHostedByMap { implicit val formats = org.json4s.DefaultFormats - val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + val map: Map[String, HostedByItemType] = Map(input._1 -> input._2) Serialization.write(map) @@ -52,34 +50,33 @@ object SparkProduceHostedByMap { } - - def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { - if(issn != null){ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, issn, eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, eissn, "" , oa) + def getHostedByItemType(id: String, officialname: String, issn: String, eissn: String, issnl: String, oa: Boolean): HostedByItemType = { + if (issn != null) { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, issn, eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, issn, eissn, "", oa) } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, issn, "", issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, "", "" , oa) + } else { + if (issnl != null) { + HostedByItemType(id, officialname, issn, "", issnl, oa) + } else { + HostedByItemType(id, officialname, issn, "", "", oa) } } - }else{ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, "", eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, "", eissn, "" , oa) + } else { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, "", eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, "", eissn, "", oa) } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, "", "", issnl , oa) - }else{ - HostedByItemType("", "", "", "", "" , oa) + } else { + if (issnl != null) { + HostedByItemType(id, officialname, "", "", issnl, oa) + } else { + HostedByItemType("", "", "", "", "", oa) } } } @@ -90,10 +87,10 @@ object SparkProduceHostedByMap { return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) } - HostedByItemType("","","","","",false) + HostedByItemType("", "", "", "", "", false) } - def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { + def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ @@ -102,10 +99,10 @@ object SparkProduceHostedByMap { implicit var encoderD = Encoders.kryo[Datasource] - val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + val dd: Dataset[Datasource] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[Datasource])) - dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => oaToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } @@ -115,17 +112,17 @@ object SparkProduceHostedByMap { } - def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + def goldHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] val mapper = new ObjectMapper() - val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) - dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => goldToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } @@ -134,41 +131,40 @@ object SparkProduceHostedByMap { return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) } - def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + def doajHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] val mapper = new ObjectMapper() - val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[DOAJModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[DOAJModel])) - dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => doajToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { - var lst : List[(String, HostedByItemType)] = List() - if(!input.issn.equals("")){ + var lst: List[(String, HostedByItemType)] = List() + if (!input.issn.equals("")) { lst = (input.issn, input) :: lst } - if(!input.eissn.equals("")){ + if (!input.eissn.equals("")) { lst = (input.eissn, input) :: lst } - if(!input.lissn.equals("")){ + if (!input.lissn.equals("")) { lst = (input.lissn, input) :: lst } lst } - - def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode: String): Unit = { val conf = new Configuration() conf.set("fs.defaultFS", hdfsNameNode) - val fs= FileSystem.get(conf) + val fs = FileSystem.get(conf) val output = fs.create(new Path(outputPath)) val writer = new PrintWriter(output) try { @@ -182,7 +178,6 @@ object SparkProduceHostedByMap { } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -213,7 +208,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath, classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala similarity index 88% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index c7ad1890d..91d2bafe3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -4,17 +4,11 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.commons.io.IOUtils -import org.apache.commons.lang3.StringUtils -import org.apache.http.client.methods.HttpGet -import org.apache.http.impl.client.HttpClients import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory - import scala.collection.JavaConverters._ import scala.io.Source @@ -59,7 +53,7 @@ object CopyHdfsOafSparkApplication { if (validPaths.nonEmpty) { val oaf = spark.read.load(validPaths: _*).as[Oaf] val mapper = new ObjectMapper() - val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + val l = ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList l.foreach( e => oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala similarity index 95% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index be217c5c3..8e15063c2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,9 +2,8 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.EntityType -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset,_} import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkConf @@ -75,7 +74,7 @@ object SparkResolveEntities { } } - def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String, targetPath:String) = { + def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String, targetPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index a194f2694..80c09940f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.oaf.{Relation, Result} +import eu.dnetlib.dhp.schema.oaf.Relation import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala similarity index 96% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala index 9e905d806..9df3b41bd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala @@ -18,7 +18,6 @@ object SparkDataciteToOAF { .config(conf) .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - import spark.implicits._ val sc = spark.sparkContext diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala similarity index 69% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 3ee0c7dd6..9d16cf907 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.Result import org.apache.commons.io.IOUtils import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf @@ -29,13 +29,13 @@ object SparkConvertDatasetToJsonRDD { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val resultObject = List("publication","dataset","software", "otherResearchProduct") + val resultObject = List("publication", "dataset", "software", "otherResearchProduct") val mapper = new ObjectMapper() - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - resultObject.foreach{item => - spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + resultObject.foreach { item => + spark.read.load(s"$sourcePath/$item").as[Result].map(r => mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index 846ac37af..cc1b97fd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import org.apache.hadoop.io.compress._ object SparkConvertObjectToJson { @@ -32,8 +32,8 @@ object SparkConvertObjectToJson { log.info(s"objectType -> $objectType") - implicit val scholixEncoder :Encoder[Scholix]= Encoders.kryo[Scholix] - implicit val summaryEncoder :Encoder[ScholixSummary]= Encoders.kryo[ScholixSummary] + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] val mapper = new ObjectMapper @@ -42,11 +42,11 @@ object SparkConvertObjectToJson { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala similarity index 62% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 4b82fe645..23f039c70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,11 +2,12 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Software,Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { @@ -31,39 +32,39 @@ object SparkConvertRDDtoDataset { val entityPath = s"$t/entities" val relPath = s"$t/relation" val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) log.info("Converting dataset") - val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + val rddDataset = spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") log.info("Converting publication") - val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + val rddPublication = spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") log.info("Converting software") - val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + val rddSoftware = spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") log.info("Converting otherresearchproduct") - val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + val rddOtherResearchProduct = spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") log.info("Converting Relation") - val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") + val relationSemanticFilter = List("cites", "iscitedby", "merges", "ismergedin") - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") + val rddRelation = spark.sparkContext.textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index 350b00c5e..ed88cfaa6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -1,14 +1,12 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset,_} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - - object SparkCreateInputGraph { def main(args: Array[String]): Unit = { @@ -33,7 +31,7 @@ object SparkCreateInputGraph { ) - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) @@ -41,16 +39,13 @@ object SparkCreateInputGraph { implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - - - val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val oafDs:Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] + val oafDs: Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] log.info("Extract Publication") @@ -70,27 +65,27 @@ object SparkCreateInputGraph { resultObject.foreach { r => log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/preprocess/${r._1}",spark, r._2) + makeDatasetUnique(s"$targetPath/extracted/${r._1}", s"$targetPath/preprocess/${r._1}", spark, r._2) } } - def extractEntities[T <: Oaf ](oafDs:Dataset[Oaf], targetPath:String, clazz:Class[T], log:Logger) :Unit = { + def extractEntities[T <: Oaf](oafDs: Dataset[Oaf], targetPath: String, clazz: Class[T], log: Logger): Unit = { - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) log.info(s"Extract ${clazz.getSimpleName}") oafDs.filter(o => o.isInstanceOf[T]).map(p => p.asInstanceOf[T]).write.mode(SaveMode.Overwrite).save(targetPath) } - def makeDatasetUnique[T <: Result ](sourcePath:String, targetPath:String, spark:SparkSession, clazz:Class[T]) :Unit = { + def makeDatasetUnique[T <: Result](sourcePath: String, targetPath: String, spark: SparkSession, clazz: Class[T]): Unit = { import spark.implicits._ - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) - val ds:Dataset[T] = spark.read.load(sourcePath).as[T] + val ds: Dataset[T] = spark.read.load(sourcePath).as[T] - ds.groupByKey(_.getId).reduceGroups{(x,y) => + ds.groupByKey(_.getId).reduceGroups { (x, y) => x.mergeFrom(y) x }.map(_._2).write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index e4fcd2782..9930c57af 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.RelatedEntities import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.count -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkCreateScholix { @@ -42,7 +42,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] - .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] @@ -51,54 +51,54 @@ object SparkCreateScholix { relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Relation), (String, ScholixSummary)) => - if (input._1!= null && input._2!= null) { + if (input._1 != null && input._2 != null) { val rel: Relation = input._1._2 val source: ScholixSummary = input._2._2 (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) } - else null + else null }(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .filter(r => r!= null) + .filter(r => r != null) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) scholixSource.joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Scholix), (String, ScholixSummary)) => - if (input._2== null) { + if (input._2 == null) { null } else { val s: Scholix = input._1._2 val target: ScholixSummary = input._2._2 ScholixUtils.generateCompleteScholix(s, target) } - }.filter(s => s!= null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") + }.filter(s => s != null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") val scholix_o_v: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] scholix_o_v.flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))).as[Scholix] - .map(s=> (s.getIdentifier,s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) .groupByKey(_._1) .agg(ScholixUtils.scholixAggregator.toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix") - val scholix_final:Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] + val scholix_final: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] - val stats:Dataset[(String,String,Long)]= scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String,String,Long)] + val stats: Dataset[(String, String, Long)] = scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String, String, Long)] stats - .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 )) + .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0)) .groupByKey(_.id) - .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset+b.relatedDataset, a.relatedPublication+b.relatedPublication)) + .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset + b.relatedDataset, a.relatedPublication + b.relatedPublication)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/related_entities") - val relatedEntitiesDS:Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication>0 || r.relatedDataset > 0) + val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) - relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map{i => + relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map { i => val re = i._1 val sum = i._2._2 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala similarity index 68% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index 0970375f5..4274cae5a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkCreateSummaryObject { @@ -28,15 +28,15 @@ object SparkCreateSummaryObject { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) + val ds: Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s != null).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala index 193512474..c70397d04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -5,6 +5,7 @@ import org.apache.spark.sql.{Encoder, Encoders} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse + import java.util.regex.Pattern import scala.language.postfixOps import scala.xml.{Elem, Node, XML} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index 79c75d6df..2717b7b80 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -2,11 +2,11 @@ package eu.dnetlib.dhp.sx.graph.pangaea import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ + import scala.io.Source object SparkGeneratePanagaeaDataset { @@ -28,17 +28,17 @@ object SparkGeneratePanagaeaDataset { parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}")) logger.info("Converting sequential file into Dataset") - val sc:SparkContext = spark.sparkContext + val sc: SparkContext = spark.sparkContext - val workingPath:String = parser.get("workingPath") + val workingPath: String = parser.get("workingPath") implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - val inputRDD:RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) + val inputRDD: RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) spark.createDataset(inputRDD).as[PangaeaDataModel] - .map(s => (s.identifier,s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) - .groupByKey(_._1)(Encoders.STRING) + .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) + .groupByKey(_._1)(Encoders.STRING) .agg(PangaeaUtils.getDatasetAggregator().toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset") @@ -46,7 +46,4 @@ object SparkGeneratePanagaeaDataset { } - - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index 93c554e04..bf81a26d4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.sx.graph.scholix - import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.sx.scholix._ import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} @@ -10,23 +9,22 @@ import org.apache.spark.sql.{Encoder, Encoders} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse - import scala.collection.JavaConverters._ import scala.io.Source -import scala.language.postfixOps object ScholixUtils { val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" - val DATE_RELATION_KEY:String = "RelationDate" - case class RelationVocabulary(original:String, inverse:String){} + val DATE_RELATION_KEY: String = "RelationDate" - case class RelatedEntities(id:String, relatedDataset:Long, relatedPublication:Long){} + case class RelationVocabulary(original: String, inverse: String) {} - val relations:Map[String, RelationVocabulary] = { - val input =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString + case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} + + val relations: Map[String, RelationVocabulary] = { + val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -35,12 +33,12 @@ object ScholixUtils { } - def extractRelationDate(relation: Relation):String = { + def extractRelationDate(relation: Relation): String = { - if (relation.getProperties== null || !relation.getProperties.isEmpty) + if (relation.getProperties == null || !relation.getProperties.isEmpty) null else { - val date =relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) + val date = relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) if (date.isDefined) date.get else @@ -48,9 +46,9 @@ object ScholixUtils { } } - def extractRelationDate(summary: ScholixSummary):String = { + def extractRelationDate(summary: ScholixSummary): String = { - if(summary.getDate== null || summary.getDate.isEmpty) + if (summary.getDate == null || summary.getDate.isEmpty) null else { summary.getDate.get(0) @@ -59,15 +57,14 @@ object ScholixUtils { } - def inverseRelationShip(rel:ScholixRelationship):ScholixRelationship = { + def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) } - - val statsAggregator:Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] with Serializable { + val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { override def zero: RelatedEntities = null override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { @@ -78,17 +75,16 @@ object ScholixUtils { if (b == null) RelatedEntities(a._1, relatedDataset, relatedPublication) else - RelatedEntities(a._1,b.relatedDataset+ relatedDataset, b.relatedPublication+ relatedPublication ) + RelatedEntities(a._1, b.relatedDataset + relatedDataset, b.relatedPublication + relatedPublication) } override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { - if (b1!= null && b2!= null) - RelatedEntities(b1.id, b1.relatedDataset+ b2.relatedDataset, b1.relatedPublication+ b2.relatedPublication) + if (b1 != null && b2 != null) + RelatedEntities(b1.id, b1.relatedDataset + b2.relatedDataset, b1.relatedPublication + b2.relatedPublication) + else if (b1 != null) + b1 else - if (b1!= null) - b1 - else b2 } @@ -104,12 +100,12 @@ object ScholixUtils { override def zero: Scholix = null - def scholix_complete(s:Scholix):Boolean ={ - if (s== null || s.getIdentifier==null) { + def scholix_complete(s: Scholix): Boolean = { + if (s == null || s.getIdentifier == null) { false } else if (s.getSource == null || s.getTarget == null) { - false - } + false + } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) false else @@ -121,7 +117,7 @@ object ScholixUtils { } override def merge(b1: Scholix, b2: Scholix): Scholix = { - if (scholix_complete(b1)) b1 else b2 + if (scholix_complete(b1)) b1 else b2 } override def finish(reduction: Scholix): Scholix = reduction @@ -132,7 +128,7 @@ object ScholixUtils { } - def createInverseScholixRelation(scholix: Scholix):Scholix = { + def createInverseScholixRelation(scholix: Scholix): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -144,34 +140,33 @@ object ScholixUtils { s - } - def extractCollectedFrom(summary:ScholixSummary): List[ScholixEntityId] = { - if (summary.getDatasources!= null && !summary.getDatasources.isEmpty) { - val l: List[ScholixEntityId] = summary.getDatasources.asScala.map{ + def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { + if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { + val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d => new ScholixEntityId(d.getDatasourceName, List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava) }(collection.breakOut) - l + l } else List() } - def extractCollectedFrom(relation: Relation) : List[ScholixEntityId] = { + def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c => - new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA,null)).asJava) + new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava) }(collection breakOut) l } else List() } - def generateCompleteScholix(scholix: Scholix, target:ScholixSummary): Scholix = { + def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -192,29 +187,28 @@ object ScholixUtils { r.setObjectType(summaryObject.getTypology.toString) r.setObjectSubType(summaryObject.getSubType) - if (summaryObject.getTitle!= null && !summaryObject.getTitle.isEmpty) - r.setTitle(summaryObject.getTitle.get(0)) + if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty) + r.setTitle(summaryObject.getTitle.get(0)) - if (summaryObject.getAuthor!= null && !summaryObject.getAuthor.isEmpty){ - val l:List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a,null)).toList + if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) { + val l: List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList if (l.nonEmpty) r.setCreator(l.asJava) } - if (summaryObject.getDate!= null && !summaryObject.getDate.isEmpty) + if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty) r.setPublicationDate(summaryObject.getDate.get(0)) - if (summaryObject.getPublisher!= null && !summaryObject.getPublisher.isEmpty) - { - val plist:List[ScholixEntityId] =summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList + if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) { + val plist: List[ScholixEntityId] = summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList if (plist.nonEmpty) r.setPublisher(plist.asJava) } - if (summaryObject.getDatasources!= null && !summaryObject.getDatasources.isEmpty) { + if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) { - val l:List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( + val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( new ScholixEntityId(c.getDatasourceName, List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava) , "collected", "complete" @@ -228,12 +222,9 @@ object ScholixUtils { } + def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { - - - def scholixFromSource(relation:Relation, source:ScholixSummary):Scholix = { - - if (relation== null || source== null) + if (relation == null || source == null) return null val s = new Scholix @@ -253,9 +244,9 @@ object ScholixUtils { s.setPublicationDate(d) - if (source.getPublisher!= null && !source.getPublisher.isEmpty) { + if (source.getPublisher != null && !source.getPublisher.isEmpty) { val l: List[ScholixEntityId] = source.getPublisher.asScala - .map{ + .map { p => new ScholixEntityId(p, null) }(collection.breakOut) @@ -265,7 +256,7 @@ object ScholixUtils { } val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) - if (semanticRelation== null) + if (semanticRelation == null) return null s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) s.setSource(generateScholixResourceFromSummary(source)) @@ -274,8 +265,8 @@ object ScholixUtils { } - def findURLForPID(pidValue:List[StructuredProperty], urls:List[String]):List[(StructuredProperty, String)] = { - pidValue.map{ + def findURLForPID(pidValue: List[StructuredProperty], urls: List[String]): List[(StructuredProperty, String)] = { + pidValue.map { p => val pv = p.getValue @@ -285,67 +276,67 @@ object ScholixUtils { } - def extractTypedIdentifierFromInstance(r:Result):List[ScholixIdentifier] = { + def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { if (r.getInstance() == null || r.getInstance().isEmpty) return List() - r.getInstance().asScala.filter(i => i.getUrl!= null && !i.getUrl.isEmpty) - .filter(i => i.getPid!= null && i.getUrl != null) + r.getInstance().asScala.filter(i => i.getUrl != null && !i.getUrl.isEmpty) + .filter(i => i.getPid != null && i.getUrl != null) .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList } - def resultToSummary(r:Result):ScholixSummary = { + def resultToSummary(r: Result): ScholixSummary = { val s = new ScholixSummary s.setId(r.getId) if (r.getPid == null || r.getPid.isEmpty) return null - val persistentIdentifiers:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) + val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) if (persistentIdentifiers.isEmpty) return null s.setLocalIdentifier(persistentIdentifiers.asJava) - if (r.isInstanceOf[Publication] ) + if (r.isInstanceOf[Publication]) s.setTypology(Typology.publication) else s.setTypology(Typology.dataset) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) - if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { - val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut) + if (r.getTitle != null && r.getTitle.asScala.nonEmpty) { + val titles: List[String] = r.getTitle.asScala.map(t => t.getValue)(collection breakOut) if (titles.nonEmpty) s.setTitle(titles.asJava) else - return null + return null } - if(r.getAuthor!= null && !r.getAuthor.isEmpty) { - val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut) + if (r.getAuthor != null && !r.getAuthor.isEmpty) { + val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname)(collection breakOut) if (authors nonEmpty) s.setAuthor(authors.asJava) } if (r.getInstance() != null) { - val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) + val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) if (dt.nonEmpty) s.setDate(dt.distinct.asJava) } - if (r.getDescription!= null && !r.getDescription.isEmpty) { - val d = r.getDescription.asScala.find(f => f!= null && f.getValue!=null) + if (r.getDescription != null && !r.getDescription.isEmpty) { + val d = r.getDescription.asScala.find(f => f != null && f.getValue != null) if (d.isDefined) s.setDescription(d.get.getValue) } - if (r.getSubject!= null && !r.getSubject.isEmpty) { - val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) + if (r.getSubject != null && !r.getSubject.isEmpty) { + val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) if (subjects.nonEmpty) s.setSubject(subjects.asJava) } - if (r.getPublisher!= null) + if (r.getPublisher != null) s.setPublisher(List(r.getPublisher.getValue).asJava) - if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) { - val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) + if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { + val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) if (cf.nonEmpty) s.setDatasources(cf.distinct.asJava) } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala similarity index 96% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index a3a753a8a..7abce547f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -3,13 +3,9 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} -import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} -import org.json4s import org.json4s.DefaultFormats -import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala similarity index 98% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 5b00e9b6f..0922f2e19 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -4,10 +4,9 @@ import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats -import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} -import org.junit.jupiter.api.Test -import org.junit.jupiter.api.Assertions._ import org.json4s.jackson.Serialization.write +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test class TestPreprocess extends java.io.Serializable{ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala From e9f285ec4d0a8fc22fc19cfe5fdb916fafbec694 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 6 Dec 2021 14:24:03 +0100 Subject: [PATCH 075/176] [scala-refactor] Module dhp-doiboost: Moved all scala source into src/main/scala and src/test/scala --- .../doiboost/DoiBoostMappingUtil.scala | 14 ++- .../SparkGenerateDOIBoostActionSet.scala | 45 ++++------ .../doiboost/SparkGenerateDoiBoost.scala | 80 ++++++++--------- .../doiboost/crossref/Crossref2Oaf.scala | 9 +- .../doiboost/crossref/CrossrefDataset.scala | 25 +++--- .../crossref/GenerateCrossrefDataset.scala | 20 ++--- .../crossref/SparkMapDumpIntoOAF.scala | 4 +- .../crossref/UnpackCrtossrefEntries.scala | 9 +- .../dnetlib/doiboost/mag/MagDataModel.scala | 2 +- .../mag/SparkImportMagIntoDataset.scala | 7 +- .../doiboost/mag/SparkProcessMAG.scala | 22 ++--- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 7 +- .../orcid/SparkConvertORCIDToOAF.scala | 8 +- .../doiboost/orcid/SparkPreprocessORCID.scala | 29 +++---- .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 8 +- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 3 +- .../doiboost/DoiBoostHostedByMapTest.scala | 70 --------------- .../doiboost/DoiBoostHostedByMapTest.scala | 20 +++++ .../dhp/doiboost/NormalizeDoiTest.scala | 0 .../crossref/CrossrefMappingTest.scala | 86 +++++-------------- .../dhp}/doiboost/mag/MAGMappingTest.scala | 11 +-- .../orcid/MappingORCIDToOAFTest.scala | 8 +- .../doiboost/uw/UnpayWallMappingTest.scala | 10 +-- 23 files changed, 181 insertions(+), 316 deletions(-) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala (97%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala (63%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala (76%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala (99%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala (77%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala (73%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala (96%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala (88%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/MagDataModel.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala (90%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala (84%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala (67%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala (80%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala (98%) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala create mode 100644 dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib => scala/eu/dnetlib/dhp}/doiboost/crossref/CrossrefMappingTest.scala (88%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib => scala/eu/dnetlib/dhp}/doiboost/mag/MAGMappingTest.scala (88%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib => scala/eu/dnetlib/dhp}/doiboost/orcid/MappingORCIDToOAFTest.scala (95%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib => scala/eu/dnetlib/dhp}/doiboost/uw/UnpayWallMappingTest.scala (91%) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala similarity index 97% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index efd5d2497..3822f40b5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -1,21 +1,19 @@ package eu.dnetlib.doiboost -import java.time.LocalDate -import java.time.format.DateTimeFormatter - +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} +import java.time.LocalDate +import java.time.format.DateTimeFormatter import scala.collection.JavaConverters._ diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala similarity index 63% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala index 3bfca0859..f13900abe 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala @@ -8,11 +8,12 @@ import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkGenerateDOIBoostActionSet { val logger: Logger = LoggerFactory.getLogger(getClass) + def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() @@ -33,53 +34,41 @@ object SparkGenerateDOIBoostActionSet { implicit val mapEncoderAtomiAction: Encoder[AtomicAction[OafDataset]] = Encoders.kryo[AtomicAction[OafDataset]] - val dbPublicationPath = parser.get("dbPublicationPath") - val dbDatasetPath = parser.get("dbDatasetPath") - val crossRefRelation = parser.get("crossRefRelation") - val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath") - val dbOrganizationPath = parser.get("dbOrganizationPath") - val sequenceFilePath = parser.get("sFilePath") + val dbPublicationPath = parser.get("dbPublicationPath") + val dbDatasetPath = parser.get("dbDatasetPath") + val crossRefRelation = parser.get("crossRefRelation") + val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath") + val dbOrganizationPath = parser.get("dbOrganizationPath") + val sequenceFilePath = parser.get("sFilePath") val asDataset = spark.read.load(dbDatasetPath).as[OafDataset] .filter(p => p != null || p.getId != null) - .map(d =>DoiBoostMappingUtil.fixResult(d)) - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .map(d => DoiBoostMappingUtil.fixResult(d)) + .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - val asPublication =spark.read.load(dbPublicationPath).as[Publication] + val asPublication = spark.read.load(dbPublicationPath).as[Publication] .filter(p => p != null || p.getId != null) - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) val asOrganization = spark.read.load(dbOrganizationPath).as[Organization] - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - + .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) val asCRelation = spark.read.load(crossRefRelation).as[Relation] - .filter(r => r!= null && r.getSource != null && r.getTarget != null) - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .filter(r => r != null && r.getSource != null && r.getTarget != null) + .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) val asRelAffiliation = spark.read.load(dbaffiliationRelationPath).as[Relation] - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - + .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) val d: Dataset[(String, String)] = asDataset.union(asPublication).union(asOrganization).union(asCRelation).union(asRelAffiliation) - - d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) - - - - - - - + d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala similarity index 76% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 501073e74..91fe56cba 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -9,28 +9,26 @@ import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ +import org.apache.spark.sql._ import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString,JArray} +import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse - +import org.slf4j.{Logger, LoggerFactory} +import scala.collection.JavaConverters._ object SparkGenerateDoiBoost { - def extractIdGRID(input:String):List[(String,String)] = { + def extractIdGRID(input: String): List[(String, String)] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: org.json4s.JValue = parse(input) - val id:String = (json \ "id").extract[String] + val id: String = (json \ "id").extract[String] - val grids:List[String] = for { + val grids: List[String] = for { JObject(pid) <- json \ "pid" JField("qualifier", JObject(qualifier)) <- pid - JField("classid", JString(classid)) <-qualifier + JField("classid", JString(classid)) <- qualifier JField("value", JString(vl)) <- pid if classid == "GRID" } yield vl @@ -38,7 +36,6 @@ object SparkGenerateDoiBoost { } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -73,7 +70,7 @@ object SparkGenerateDoiBoost { if (a != null && a._2 != null) { b.mergeFrom(a._2) b.setId(a._1) - val authors =AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor) + val authors = AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor) b.setAuthor(authors) return b } @@ -87,11 +84,11 @@ object SparkGenerateDoiBoost { return b2 } else { - if (b2 != null ) { + if (b2 != null) { b1.mergeFrom(b2) - val authors =AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) + val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) b1.setAuthor(authors) - if (b2.getId!= null && b2.getId.nonEmpty) + if (b2.getId != null && b2.getId.nonEmpty) b1.setId(b2.getId) return b1 } @@ -118,10 +115,9 @@ object SparkGenerateDoiBoost { val crossrefPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p)) val uwPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/uwPublication").as[Publication].map(p => (p.getId, p)) - def applyMerge(item:((String, Publication), (String, Publication))) : Publication = - { + def applyMerge(item: ((String, Publication), (String, Publication))): Publication = { val crossrefPub = item._1._2 - if (item._2!= null) { + if (item._2 != null) { val otherPub = item._2._2 if (otherPub != null) { crossrefPub.mergeFrom(otherPub) @@ -130,6 +126,7 @@ object SparkGenerateDoiBoost { } crossrefPub } + crossrefPublication.joinWith(uwPublication, crossrefPublication("_1").equalTo(uwPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/firstJoin") logger.info("Phase 3) Join Result with ORCID") val fj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p)) @@ -143,9 +140,9 @@ object SparkGenerateDoiBoost { sj.joinWith(magPublication, sj("_1").equalTo(magPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublication") - val doiBoostPublication: Dataset[(String,Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublication").as[Publication].filter(p=>DoiBoostMappingUtil.filterPublication(p)).map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder) + val doiBoostPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublication").as[Publication].filter(p => DoiBoostMappingUtil.filterPublication(p)).map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder) - val hostedByDataset : Dataset[(String, HostedByItemType)] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem)) + val hostedByDataset: Dataset[(String, HostedByItemType)] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem)) doiBoostPublication.joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left") @@ -164,21 +161,20 @@ object SparkGenerateDoiBoost { val paperAffiliation = spark.read.load(paperAffiliationPath).select(col("AffiliationId").alias("affId"), col("PaperId")) - val a:Dataset[DoiBoostAffiliation] = paperAffiliation + val a: Dataset[DoiBoostAffiliation] = paperAffiliation .joinWith(affiliation, paperAffiliation("affId").equalTo(affiliation("AffiliationId"))) .select(col("_1.PaperId"), col("_2.AffiliationId"), col("_2.GridId"), col("_2.OfficialPage"), col("_2.DisplayName")).as[DoiBoostAffiliation] - - val magPubs:Dataset[(String,Publication)]= spark.read.load(s"$workingDirPath/doiBoostPublicationFiltered").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))(tupleForJoinEncoder).filter(s =>s._1!= null ) + val magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublicationFiltered").as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))(tupleForJoinEncoder).filter(s => s._1 != null) - magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).flatMap(item => { - val pub:Publication = item._1._2 + magPubs.joinWith(a, magPubs("_1").equalTo(a("PaperId"))).flatMap(item => { + val pub: Publication = item._1._2 val affiliation = item._2 - val affId:String = if (affiliation.GridId.isDefined) s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) - val r:Relation = new Relation + val affId: String = if (affiliation.GridId.isDefined) s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) + val r: Relation = new Relation r.setSource(pub.getId) r.setTarget(affId) r.setRelType(ModelConstants.RESULT_ORGANIZATION) @@ -186,7 +182,7 @@ object SparkGenerateDoiBoost { r.setSubRelType(ModelConstants.AFFILIATION) r.setDataInfo(pub.getDataInfo) r.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) - val r1:Relation = new Relation + val r1: Relation = new Relation r1.setTarget(pub.getId) r1.setSource(affId) r1.setRelType(ModelConstants.RESULT_ORGANIZATION) @@ -198,33 +194,31 @@ object SparkGenerateDoiBoost { })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") - - - val unresolvedRels:Dataset[(String, Relation)] = spark.read.load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved").as[Relation].map(r => { + val unresolvedRels: Dataset[(String, Relation)] = spark.read.load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved").as[Relation].map(r => { if (r.getSource.startsWith("unresolved")) (r.getSource, r) else if (r.getTarget.startsWith("unresolved")) - (r.getTarget,r) - else + (r.getTarget, r) + else ("resolved", r) })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) - val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) + val openaireOrganization: Dataset[(String, String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x, y) => if (x != null) x else y).map(_._2) - unresolvedRels.joinWith(openaireOrganization,unresolvedRels("_1").equalTo(openaireOrganization("_2"))) + unresolvedRels.joinWith(openaireOrganization, unresolvedRels("_1").equalTo(openaireOrganization("_2"))) .map { x => val currentRels = x._1._2 val currentOrgs = x._2 - if (currentOrgs!= null) - if(currentRels.getSource.startsWith("unresolved")) + if (currentOrgs != null) + if (currentRels.getSource.startsWith("unresolved")) currentRels.setSource(currentOrgs._1) else currentRels.setTarget(currentOrgs._1) - currentRels - }.filter(r=> !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") + currentRels + }.filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") - magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { + magPubs.joinWith(a, magPubs("_1").equalTo(a("PaperId"))).map(item => { val affiliation = item._2 if (affiliation.GridId.isEmpty) { val o = new Organization @@ -241,7 +235,7 @@ object SparkGenerateDoiBoost { } else null - }).filter(o=> o!=null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization") - } + }).filter(o => o != null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization") + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 1b1c850ba..edca4a180 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,20 +4,19 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JValue, _} +import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ import org.slf4j.{Logger, LoggerFactory} +import java.util import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex -import java.util - -import eu.dnetlib.doiboost.DoiBoostMappingUtil case class CrossrefDT(doi: String, json:String, timestamp: Long) {} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala similarity index 77% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 159b817c7..6a1c701af 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -6,7 +6,7 @@ import org.apache.commons.io.IOUtils import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -17,12 +17,12 @@ object CrossrefDataset { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - def to_item(input:String):CrossrefDT = { + def to_item(input: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val ts: Long = (json \ "indexed" \ "timestamp").extract[Long] + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) CrossrefDT(doi, input, ts) } @@ -30,7 +30,6 @@ object CrossrefDataset { def main(args: Array[String]): Unit = { - val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) parser.parseArgument(args) @@ -54,7 +53,7 @@ object CrossrefDataset { return b - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -66,7 +65,7 @@ object CrossrefDataset { if (a == null) return b - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -79,20 +78,20 @@ object CrossrefDataset { override def finish(reduction: CrossrefDT): CrossrefDT = reduction } - val workingPath:String = parser.get("workingPath") + val workingPath: String = parser.get("workingPath") - val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] + val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] val update = - spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) - .map(i =>CrossrefImporter.decompressBlob(i._2.toString)) - .map(i =>to_item(i))) + spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i => CrossrefImporter.decompressBlob(i._2.toString)) + .map(i => to_item(i))) main_ds.union(update).groupByKey(_.doi) .agg(crossrefAggregator.toColumn) - .map(s=>s._2) + .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala similarity index 73% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index 526ff7b3a..6d03abc25 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -2,17 +2,12 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item -import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass -import org.apache.hadoop.io.{IntWritable, Text} -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.JArray -import org.json4s.jackson.JsonMethods.{compact, parse, render} +import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -24,11 +19,10 @@ object GenerateCrossrefDataset { implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT] - def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long] CrossrefDT(doi, meta, timestamp) @@ -51,14 +45,14 @@ object GenerateCrossrefDataset { import spark.implicits._ - val tmp : RDD[String] = sc.textFile(sourcePath,6000) + val tmp: RDD[String] = sc.textFile(sourcePath, 6000) spark.createDataset(tmp) .map(entry => crossrefElement(entry)) .write.mode(SaveMode.Overwrite).save(targetPath) -// .map(meta => crossrefElement(meta)) -// .toDS.as[CrossrefDT] -// .write.mode(SaveMode.Overwrite).save(targetPath) + // .map(meta => crossrefElement(meta)) + // .toDS.as[CrossrefDT] + // .write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index c65916610..fa55b9fb9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -4,10 +4,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset} import org.apache.commons.io.IOUtils - import org.apache.spark.SparkConf - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala similarity index 88% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala index 95ecb568b..191c4587e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala @@ -2,8 +2,8 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.hadoop.io.compress.GzipCodec +import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST.JArray @@ -17,9 +17,7 @@ object UnpackCrtossrefEntries { val log: Logger = LoggerFactory.getLogger(UnpackCrtossrefEntries.getClass) - - - def extractDump(input:String):List[String] = { + def extractDump(input: String): List[String] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -30,7 +28,6 @@ object UnpackCrtossrefEntries { } - def main(args: Array[String]): Unit = { val conf = new SparkConf val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) @@ -45,7 +42,7 @@ object UnpackCrtossrefEntries { .getOrCreate() val sc: SparkContext = spark.sparkContext - sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2)) + sc.wholeTextFiles(sourcePath, 6000).flatMap(d => extractDump(d._2)) .saveAsTextFile(targetPath, classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala index fd9629024..0a6fa00f0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import scala.collection.JavaConverters._ import scala.collection.mutable diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index a68d0bb2d..d25a4893f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -3,8 +3,8 @@ package eu.dnetlib.doiboost.mag import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{SaveMode, SparkSession} import org.apache.spark.sql.types._ +import org.apache.spark.sql.{SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkImportMagIntoDataset { @@ -24,13 +24,13 @@ object SparkImportMagIntoDataset { "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), + "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), + "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), @@ -75,7 +75,6 @@ object SparkImportMagIntoDataset { .master(parser.get("master")).getOrCreate() - stream.foreach { case (k, v) => val s: StructType = getSchema(k) val df = spark.read diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala similarity index 90% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 0ea4b66a4..41e95baa1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -5,22 +5,19 @@ import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.{col, collect_list, struct} import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ - object SparkProcessMAG { - def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={ + def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { d.where(col("Doi").isNotNull) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) + .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) .map(_._2)(Encoders.product[MagPapers]) .map(mp => { - new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), + MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), mp.DocType, mp.PaperTitle, mp.OriginalTitle, mp.BookTitle, mp.Year, mp.Date, mp.Publisher: String, mp.JournalId, mp.ConferenceSeriesId, mp.ConferenceInstanceId, @@ -98,13 +95,13 @@ object SparkProcessMAG { var magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val conference = spark.read.load(s"$sourcePath/ConferenceInstances") - .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" ) + .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate", $"EndDate") val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) - .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] + .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate", $"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") @@ -122,7 +119,7 @@ object SparkProcessMAG { magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") + ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") @@ -148,11 +145,10 @@ object SparkProcessMAG { spark.read.load(s"$workingPath/mag_publication").as[Publication] .filter(p => p.getId != null) .groupByKey(p => p.getId) - .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) + .reduceGroups((a: Publication, b: Publication) => ConversionUtil.mergePublication(a, b)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 1cd3f7028..11031f9ca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -4,17 +4,16 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} -import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI} import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo} import org.apache.commons.lang.StringUtils -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala similarity index 84% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index fa4a93e00..1b189e296 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -11,10 +11,10 @@ object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { + def run(spark: SparkSession, workingPath: String, targetPath: String): Unit = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + val dataset: Dataset[ORCIDItem] = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) @@ -35,8 +35,8 @@ object SparkConvertORCIDToOAF { val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark,workingPath, targetPath) + run(spark, workingPath, targetPath) } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala similarity index 67% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index 31f331912..153be5dd1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -1,48 +1,45 @@ package eu.dnetlib.doiboost.orcid -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.dhp.schema.orcid.OrcidDOI import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.functions.{col, collect_list} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkPreprocessORCID { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def fixORCIDItem(item :ORCIDItem):ORCIDItem = { - ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + def fixORCIDItem(item: ORCIDItem): ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) } - def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { + def run(spark: SparkSession, sourcePath: String, workingPath: String): Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) + val inputRDD: RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s != null).filter(s => ORCIDToOAF.authorValid(s)) spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s != null) spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + val authors: Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + val works: Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i =>{ + .map(i => { val doi = i._1.doi val author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) + (doi, author) + }).groupBy(col("_1").alias("doi")) .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] .map(s => fixORCIDItem(s)) .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") @@ -67,4 +64,4 @@ object SparkPreprocessORCID { } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala similarity index 80% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 4530926f1..70290018d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -1,16 +1,14 @@ package eu.dnetlib.doiboost.uw import eu.dnetlib.dhp.application.ArgumentApplicationParser - import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - object SparkMapUnpayWallToOAF { def main(args: Array[String]): Unit = { @@ -32,11 +30,11 @@ object SparkMapUnpayWallToOAF { val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath") + val inputRDD: RDD[String] = spark.sparkContext.textFile(s"$sourcePath") logger.info("Converting UnpayWall to OAF") - val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication] + val d: Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)).as[Publication] d.write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index c8324cde1..bf5694965 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -4,14 +4,13 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ -import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala deleted file mode 100644 index 4912648be..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala +++ /dev/null @@ -1,70 +0,0 @@ -package eu.dnetlib.dhp.doiboost - -import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset} -import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType} -import eu.dnetlib.doiboost.SparkGenerateDoiBoost.getClass -import eu.dnetlib.doiboost.mag.ConversionUtil -import eu.dnetlib.doiboost.orcid.ORCIDElement -import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} -import org.junit.jupiter.api.Test - -import scala.io.Source - -class DoiBoostHostedByMapTest { - - - -// @Test -// def testMerge():Unit = { -// val conf: SparkConf = new SparkConf() -// val spark: SparkSession = -// SparkSession -// .builder() -// .config(conf) -// .appName(getClass.getSimpleName) -// .master("local[*]").getOrCreate() -// -// -// -// implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] -// implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] -// implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub) -// -// -// import spark.implicits._ -// val dataset:RDD[String]= spark.sparkContext.textFile("/home/sandro/Downloads/hbMap.gz") -// -// -// val hbMap:Dataset[(String, HostedByItemType)] =spark.createDataset(dataset.map(DoiBoostMappingUtil.toHostedByItem)) -// -// -// hbMap.show() -// -// -// -// -// -// -// -// -// -// -// } - - - @Test - def idDSGeneration():Unit = { - val s ="doajarticles::0066-782X" - - - - println(DoiBoostMappingUtil.generateDSId(s)) - - - } - - -} diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala new file mode 100644 index 000000000..41730ade0 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.doiboost + +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import org.junit.jupiter.api.Test + +class DoiBoostHostedByMapTest { + + @Test + def idDSGeneration():Unit = { + val s ="doajarticles::0066-782X" + + + + println(DoiBoostMappingUtil.generateDSId(s)) + + + } + + +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala similarity index 88% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 5ef92cfa4..71dbf27be 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -1,7 +1,8 @@ -package eu.dnetlib.doiboost.crossref +package eu.dnetlib.dhp.doiboost.crossref import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.utils.DHPUtils +import eu.dnetlib.doiboost.crossref.Crossref2Oaf import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test @@ -21,9 +22,9 @@ class CrossrefMappingTest { @Test def testFunderRelationshipsMapping(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString - val funder_doi = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString - val funder_name = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString + val template = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json")).mkString + val funder_doi = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")).mkString + val funder_name = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")).mkString for (line <- funder_doi.lines) { @@ -72,7 +73,7 @@ class CrossrefMappingTest { @Test def testOrcidID() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("orcid_data.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/orcid_data.json")).mkString assertNotNull(json) @@ -93,7 +94,7 @@ class CrossrefMappingTest { @Test def testEmptyTitle() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("empty_title.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/empty_title.json")).mkString assertNotNull(json) @@ -115,7 +116,7 @@ class CrossrefMappingTest { @Test def testPeerReviewed(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("prwTest.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json")).mkString mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) assertNotNull(json) @@ -156,7 +157,7 @@ class CrossrefMappingTest { @Test def testJournalRelation(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("awardTest.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/awardTest.json")).mkString assertNotNull(json) assertFalse(json.isEmpty) @@ -177,7 +178,7 @@ class CrossrefMappingTest { @Test def testConvertBookFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("book.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/book.json")).mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -233,7 +234,7 @@ class CrossrefMappingTest { @Test def testConvertPreprintFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("preprint.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/preprint.json")).mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -291,7 +292,7 @@ class CrossrefMappingTest { @Test def testConvertDatasetFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("dataset.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dataset.json")).mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -332,7 +333,7 @@ class CrossrefMappingTest { @Test def testConvertArticleFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")).mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -400,7 +401,7 @@ class CrossrefMappingTest { @Test def testSetDateOfAcceptanceCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("dump_file.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dump_file.json")).mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -415,55 +416,12 @@ class CrossrefMappingTest { assert(items.size == 1) val result: Result = items.head.asInstanceOf[Publication] assertNotNull(result) - logger.info(mapper.writeValueAsString(result)); - -// assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); -// assertNotNull( -// result.getDataInfo.getProvenanceaction, -// "DataInfo/Provenance test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getClassid.isEmpty, -// "DataInfo/Provenance/classId test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getClassname.isEmpty, -// "DataInfo/Provenance/className test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, -// "DataInfo/Provenance/SchemeId test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, -// "DataInfo/Provenance/SchemeName test not null Failed"); -// -// assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); -// assertFalse(result.getCollectedfrom.isEmpty); -// -// val collectedFromList = result.getCollectedfrom.asScala -// assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") -// -// assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") -// -// -// val relevantDates = result.getRelevantdate.asScala -// -// assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") -// -// val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] -// assertFalse(rels.isEmpty) -// rels.foreach(relation => { -// assertNotNull(relation) -// assertFalse(relation.getSource.isEmpty) -// assertFalse(relation.getTarget.isEmpty) -// assertFalse(relation.getRelClass.isEmpty) -// assertFalse(relation.getRelType.isEmpty) -// assertFalse(relation.getSubRelType.isEmpty) -// -// }) } @Test def testNormalizeDOI(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString + val template = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json")).mkString val line :String = "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}]," val json = template.replace("%s", line) val resultList: List[Oaf] = Crossref2Oaf.convert(json) @@ -479,7 +437,7 @@ class CrossrefMappingTest { @Test def testNormalizeDOI2(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString + val template = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")).mkString val resultList: List[Oaf] = Crossref2Oaf.convert(template) assertTrue(resultList.nonEmpty) @@ -494,7 +452,7 @@ class CrossrefMappingTest { @Test def testLicenseVorClosed() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_vor.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_vor.json")).mkString assertNotNull(json) @@ -521,7 +479,7 @@ class CrossrefMappingTest { @Test def testLicenseOpen() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_open.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_open.json")).mkString assertNotNull(json) @@ -544,7 +502,7 @@ class CrossrefMappingTest { @Test def testLicenseEmbargoOpen() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_open.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json")).mkString assertNotNull(json) @@ -567,7 +525,7 @@ class CrossrefMappingTest { @Test def testLicenseEmbargo() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo.json")).mkString assertNotNull(json) @@ -591,7 +549,7 @@ class CrossrefMappingTest { @Test def testLicenseEmbargoDateTime() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_datetime.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json")).mkString assertNotNull(json) @@ -614,7 +572,7 @@ class CrossrefMappingTest { @Test def testMultipleURLs() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("multiple_urls.json")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/multiple_urls.json")).mkString assertNotNull(json) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala similarity index 88% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala index 46d4ec08d..611f3b323 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala @@ -1,11 +1,12 @@ -package eu.dnetlib.doiboost.mag +package eu.dnetlib.dhp.doiboost.mag +import eu.dnetlib.doiboost.mag.{ConversionUtil, MagPapers, SparkProcessMAG} import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, SparkSession} import org.codehaus.jackson.map.ObjectMapper +import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test -import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} import java.sql.Timestamp @@ -47,7 +48,7 @@ class MAGMappingTest { @Test def buildInvertedIndexTest(): Unit = { - val json_input = Source.fromInputStream(getClass.getResourceAsStream("invertedIndex.json")).mkString + val json_input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/mag/invertedIndex.json")).mkString val description = ConversionUtil.convertInvertedIndexString(json_input) assertNotNull(description) assertTrue(description.nonEmpty) @@ -71,7 +72,7 @@ class MAGMappingTest { .appName(getClass.getSimpleName) .config(conf) .getOrCreate() - val path = getClass.getResource("magPapers.json").getPath + val path = getClass.getResource("/eu/dnetlib/doiboost/mag/magPapers.json").getPath import org.apache.spark.sql.Encoders val schema = Encoders.product[MagPapers].schema @@ -101,7 +102,7 @@ class MAGMappingTest { .appName(getClass.getSimpleName) .config(conf) .getOrCreate() - val path = getClass.getResource("duplicatedMagPapers.json").getPath + val path = getClass.getResource("/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json").getPath import org.apache.spark.sql.Encoders val schema = Encoders.product[MagPapers].schema diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala similarity index 95% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala index b484dc087..7c8f01f81 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -1,7 +1,8 @@ -package eu.dnetlib.doiboost.orcid +package eu.dnetlib.dhp.doiboost.orcid import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.doiboost.orcid._ import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.junit.jupiter.api.Assertions._ @@ -10,9 +11,8 @@ import org.junit.jupiter.api.io.TempDir import org.slf4j.{Logger, LoggerFactory} import java.nio.file.Path -import scala.io.Source - import scala.collection.JavaConversions._ +import scala.io.Source class MappingORCIDToOAFTest { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) @@ -20,7 +20,7 @@ class MappingORCIDToOAFTest { @Test def testExtractData():Unit ={ - val json = Source.fromInputStream(getClass.getResourceAsStream("dataOutput")).mkString + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/orcid/dataOutput")).mkString assertNotNull(json) assertFalse(json.isEmpty) json.lines.foreach(s => { diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala similarity index 91% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala index fa696fffc..6671758b2 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala @@ -1,13 +1,13 @@ -package eu.dnetlib.doiboost.uw - +package eu.dnetlib.dhp.doiboost.uw import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.OpenAccessRoute +import eu.dnetlib.doiboost.uw.UnpayWallToOAF +import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test +import org.slf4j.{Logger, LoggerFactory} import scala.io.Source -import org.junit.jupiter.api.Assertions._ -import org.slf4j.{Logger, LoggerFactory} class UnpayWallMappingTest { @@ -18,7 +18,7 @@ class UnpayWallMappingTest { @Test def testMappingToOAF():Unit ={ - val Ilist = Source.fromInputStream(getClass.getResourceAsStream("input.json")).mkString + val Ilist = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/uw/input.json")).mkString var i:Int = 0 for (line <-Ilist.lines) { From 96a7d46278bb57f9b0324fda5ae6429a6fc47d7c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 6 Dec 2021 15:06:32 +0100 Subject: [PATCH 076/176] [Graph Dump] fixed tests --- .../dnetlib/dhp/common/GraphResultMapper.java | 826 +++++++++--------- .../dhp/oa/graph/dump/DumpJobTest.java | 34 +- .../addProjectInfo/publication_extendedmodel | 4 +- 3 files changed, 432 insertions(+), 432 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java index 8ceee5c8a..714fd21f6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -1,413 +1,413 @@ - -package eu.dnetlib.dhp.common; - -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.*; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class GraphResultMapper implements Serializable { - - public static Result map( - E in) { - - CommunityResult out = new CommunityResult(); - - eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; - Optional ort = Optional.ofNullable(input.getResulttype()); - if (ort.isPresent()) { - switch (ort.get().getClassid()) { - case "publication": - Optional journal = Optional - .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal()); - if (journal.isPresent()) { - Journal j = journal.get(); - Container c = new Container(); - c.setConferencedate(j.getConferencedate()); - c.setConferenceplace(j.getConferenceplace()); - c.setEdition(j.getEdition()); - c.setEp(j.getEp()); - c.setIss(j.getIss()); - c.setIssnLinking(j.getIssnLinking()); - c.setIssnOnline(j.getIssnOnline()); - c.setIssnPrinted(j.getIssnPrinted()); - c.setName(j.getName()); - c.setSp(j.getSp()); - c.setVol(j.getVol()); - out.setContainer(c); - out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); - } - break; - case "dataset": - eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; - Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); - Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); - - out - .setGeolocation( - Optional - .ofNullable(id.getGeolocation()) - .map( - igl -> igl - .stream() - .filter(Objects::nonNull) - .map(gli -> { - GeoLocation gl = new GeoLocation(); - gl.setBox(gli.getBox()); - gl.setPlace(gli.getPlace()); - gl.setPoint(gli.getPoint()); - return gl; - }) - .collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); - break; - case "software": - - eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input; - Optional - .ofNullable(is.getCodeRepositoryUrl()) - .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); - Optional - .ofNullable(is.getDocumentationUrl()) - .ifPresent( - value -> out - .setDocumentationUrl( - value - .stream() - .map(Field::getValue) - .collect(Collectors.toList()))); - - Optional - .ofNullable(is.getProgrammingLanguage()) - .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); - - out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); - break; - case "other": - - eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input; - out - .setContactgroup( - Optional - .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out - .setContactperson( - Optional - .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - out - .setTool( - Optional - .ofNullable(ir.getTool()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); - - break; - } - - Optional - .ofNullable(input.getAuthor()) - .ifPresent( - ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); - - // I do not map Access Right UNKNOWN or OTHER - - Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); - out - .setBestaccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - final List contributorList = new ArrayList<>(); - Optional - .ofNullable(input.getContributor()) - .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); - out.setContributor(contributorList); - - Optional - .ofNullable(input.getCountry()) - .ifPresent( - value -> out - .setCountry( - value - .stream() - .map( - c -> { - if (c.getClassid().equals((ModelConstants.UNKNOWN))) { - return null; - } - Country country = new Country(); - country.setCode(c.getClassid()); - country.setLabel(c.getClassname()); - Optional - .ofNullable(c.getDataInfo()) - .ifPresent( - provenance -> country - .setProvenance( - Provenance - .newInstance( - provenance - .getProvenanceaction() - .getClassname(), - c.getDataInfo().getTrust()))); - return country; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()))); - - final List coverageList = new ArrayList<>(); - Optional - .ofNullable(input.getCoverage()) - .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); - out.setCoverage(coverageList); - - out.setDateofcollection(input.getDateofcollection()); - - final List descriptionList = new ArrayList<>(); - Optional - .ofNullable(input.getDescription()) - .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); - out.setDescription(descriptionList); - Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); - if (oStr.isPresent()) { - out.setEmbargoenddate(oStr.get().getValue()); - } - - final List formatList = new ArrayList<>(); - Optional - .ofNullable(input.getFormat()) - .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); - out.setFormat(formatList); - out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); - - Optional> oInst = Optional - .ofNullable(input.getInstance()); - - if (oInst.isPresent()) { - out - .setInstance( - oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); - - } - - Optional oL = Optional.ofNullable(input.getLanguage()); - if (oL.isPresent()) { - eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); - } - Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); - if (oLong.isPresent()) { - out.setLastupdatetimestamp(oLong.get()); - } - Optional> otitle = Optional.ofNullable(input.getTitle()); - if (otitle.isPresent()) { - List iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setMaintitle(iTitle.get(0).getValue()); - } - - iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setSubtitle(iTitle.get(0).getValue()); - } - - } - - List pids = new ArrayList<>(); - Optional - .ofNullable(input.getPid()) - .ifPresent( - value -> value - .stream() - .forEach( - p -> pids - .add( - ControlledField - .newInstance(p.getQualifier().getClassid(), p.getValue())))); - out.setPid(pids); - oStr = Optional.ofNullable(input.getDateofacceptance()); - if (oStr.isPresent()) { - out.setPublicationdate(oStr.get().getValue()); - } - oStr = Optional.ofNullable(input.getPublisher()); - if (oStr.isPresent()) { - out.setPublisher(oStr.get().getValue()); - } - - List sourceList = new ArrayList<>(); - Optional - .ofNullable(input.getSource()) - .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue()))); - // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); - List subjectList = new ArrayList<>(); - Optional - .ofNullable(input.getSubject()) - .ifPresent( - value -> value - .forEach(s -> subjectList.add(getSubject(s)))); - - out.setSubjects(subjectList); - - out.setType(input.getResulttype().getClassid()); - } - - out - .setCollectedfrom( - input - .getCollectedfrom() - .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) - .collect(Collectors.toList())); - - return out; - - } - - private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { - CommunityInstance instance = new CommunityInstance(); - - setCommonValue(i, instance); - - instance - .setCollectedfrom( - KeyValue - .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); - - instance - .setHostedby( - KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); - - return instance; - - } - - private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { - Optional opAr = Optional - .ofNullable(i.getAccessright()); - if (opAr.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); - instance - .setAccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - Optional - .ofNullable(i.getLicense()) - .ifPresent(value -> instance.setLicense(value.getValue())); - Optional - .ofNullable(i.getDateofacceptance()) - .ifPresent(value -> instance.setPublicationdate(value.getValue())); - Optional - .ofNullable(i.getRefereed()) - .ifPresent(value -> instance.setRefereed(value.getClassname())); - Optional - .ofNullable(i.getInstancetype()) - .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); - - } - - private static Subject getSubject(StructuredProperty s) { - Subject subject = new Subject(); - subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); - Optional di = Optional.ofNullable(s.getDataInfo()); - if (di.isPresent()) { - Provenance p = new Provenance(); - p.setProvenance(di.get().getProvenanceaction().getClassname()); - p.setTrust(di.get().getTrust()); - subject.setProvenance(p); - } - - return subject; - } - - private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { - Author a = new Author(); - a.setFullname(oa.getFullname()); - a.setName(oa.getName()); - a.setSurname(oa.getSurname()); - a.setRank(oa.getRank()); - - Optional> oPids = Optional - .ofNullable(oa.getPid()); - if (oPids.isPresent()) { - Pid pid = getOrcid(oPids.get()); - if (pid != null) { - a.setPid(pid); - } - } - - return a; - } - - private static Pid getOrcid(List p) { - for (StructuredProperty pid : p) { - if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) { - Optional di = Optional.ofNullable(pid.getDataInfo()); - if (di.isPresent()) { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()), - Provenance - .newInstance( - di.get().getProvenanceaction().getClassname(), - di.get().getTrust())); - } else { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()) - - ); - } - - } - } - return null; - } - -} +// +//package eu.dnetlib.dhp.common; +// +//import java.io.Serializable; +//import java.util.*; +//import java.util.stream.Collectors; +// +//import eu.dnetlib.dhp.schema.common.ModelConstants; +//import eu.dnetlib.dhp.schema.dump.oaf.*; +//import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; +//import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +//import eu.dnetlib.dhp.schema.oaf.DataInfo; +//import eu.dnetlib.dhp.schema.oaf.Field; +//import eu.dnetlib.dhp.schema.oaf.Journal; +//import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +// +//public class GraphResultMapper implements Serializable { +// +// public static Result map( +// E in) { +// +// CommunityResult out = new CommunityResult(); +// +// eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; +// Optional ort = Optional.ofNullable(input.getResulttype()); +// if (ort.isPresent()) { +// switch (ort.get().getClassid()) { +// case "publication": +// Optional journal = Optional +// .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal()); +// if (journal.isPresent()) { +// Journal j = journal.get(); +// Container c = new Container(); +// c.setConferencedate(j.getConferencedate()); +// c.setConferenceplace(j.getConferenceplace()); +// c.setEdition(j.getEdition()); +// c.setEp(j.getEp()); +// c.setIss(j.getIss()); +// c.setIssnLinking(j.getIssnLinking()); +// c.setIssnOnline(j.getIssnOnline()); +// c.setIssnPrinted(j.getIssnPrinted()); +// c.setName(j.getName()); +// c.setSp(j.getSp()); +// c.setVol(j.getVol()); +// out.setContainer(c); +// out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); +// } +// break; +// case "dataset": +// eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; +// Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); +// Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); +// +// out +// .setGeolocation( +// Optional +// .ofNullable(id.getGeolocation()) +// .map( +// igl -> igl +// .stream() +// .filter(Objects::nonNull) +// .map(gli -> { +// GeoLocation gl = new GeoLocation(); +// gl.setBox(gli.getBox()); +// gl.setPlace(gli.getPlace()); +// gl.setPoint(gli.getPoint()); +// return gl; +// }) +// .collect(Collectors.toList())) +// .orElse(null)); +// +// out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); +// break; +// case "software": +// +// eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input; +// Optional +// .ofNullable(is.getCodeRepositoryUrl()) +// .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); +// Optional +// .ofNullable(is.getDocumentationUrl()) +// .ifPresent( +// value -> out +// .setDocumentationUrl( +// value +// .stream() +// .map(Field::getValue) +// .collect(Collectors.toList()))); +// +// Optional +// .ofNullable(is.getProgrammingLanguage()) +// .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); +// +// out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); +// break; +// case "other": +// +// eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input; +// out +// .setContactgroup( +// Optional +// .ofNullable(ir.getContactgroup()) +// .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) +// .orElse(null)); +// +// out +// .setContactperson( +// Optional +// .ofNullable(ir.getContactperson()) +// .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) +// .orElse(null)); +// out +// .setTool( +// Optional +// .ofNullable(ir.getTool()) +// .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) +// .orElse(null)); +// +// out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); +// +// break; +// } +// +// Optional +// .ofNullable(input.getAuthor()) +// .ifPresent( +// ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); +// +// // I do not map Access Right UNKNOWN or OTHER +// +// Optional oar = Optional.ofNullable(input.getBestaccessright()); +// if (oar.isPresent()) { +// if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { +// String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); +// out +// .setBestaccessright( +// AccessRight +// .newInstance( +// code, +// Constants.coarCodeLabelMap.get(code), +// Constants.COAR_ACCESS_RIGHT_SCHEMA)); +// } +// } +// +// final List contributorList = new ArrayList<>(); +// Optional +// .ofNullable(input.getContributor()) +// .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); +// out.setContributor(contributorList); +// +// Optional +// .ofNullable(input.getCountry()) +// .ifPresent( +// value -> out +// .setCountry( +// value +// .stream() +// .map( +// c -> { +// if (c.getClassid().equals((ModelConstants.UNKNOWN))) { +// return null; +// } +// Country country = new Country(); +// country.setCode(c.getClassid()); +// country.setLabel(c.getClassname()); +// Optional +// .ofNullable(c.getDataInfo()) +// .ifPresent( +// provenance -> country +// .setProvenance( +// Provenance +// .newInstance( +// provenance +// .getProvenanceaction() +// .getClassname(), +// c.getDataInfo().getTrust()))); +// return country; +// }) +// .filter(Objects::nonNull) +// .collect(Collectors.toList()))); +// +// final List coverageList = new ArrayList<>(); +// Optional +// .ofNullable(input.getCoverage()) +// .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); +// out.setCoverage(coverageList); +// +// out.setDateofcollection(input.getDateofcollection()); +// +// final List descriptionList = new ArrayList<>(); +// Optional +// .ofNullable(input.getDescription()) +// .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); +// out.setDescription(descriptionList); +// Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); +// if (oStr.isPresent()) { +// out.setEmbargoenddate(oStr.get().getValue()); +// } +// +// final List formatList = new ArrayList<>(); +// Optional +// .ofNullable(input.getFormat()) +// .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); +// out.setFormat(formatList); +// out.setId(input.getId()); +// out.setOriginalId(input.getOriginalId()); +// +// Optional> oInst = Optional +// .ofNullable(input.getInstance()); +// +// if (oInst.isPresent()) { +// out +// .setInstance( +// oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); +// +// } +// +// Optional oL = Optional.ofNullable(input.getLanguage()); +// if (oL.isPresent()) { +// eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); +// out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); +// } +// Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); +// if (oLong.isPresent()) { +// out.setLastupdatetimestamp(oLong.get()); +// } +// Optional> otitle = Optional.ofNullable(input.getTitle()); +// if (otitle.isPresent()) { +// List iTitle = otitle +// .get() +// .stream() +// .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) +// .collect(Collectors.toList()); +// if (!iTitle.isEmpty()) { +// out.setMaintitle(iTitle.get(0).getValue()); +// } +// +// iTitle = otitle +// .get() +// .stream() +// .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) +// .collect(Collectors.toList()); +// if (!iTitle.isEmpty()) { +// out.setSubtitle(iTitle.get(0).getValue()); +// } +// +// } +// +// List pids = new ArrayList<>(); +// Optional +// .ofNullable(input.getPid()) +// .ifPresent( +// value -> value +// .stream() +// .forEach( +// p -> pids +// .add( +// ControlledField +// .newInstance(p.getQualifier().getClassid(), p.getValue())))); +// out.setPid(pids); +// oStr = Optional.ofNullable(input.getDateofacceptance()); +// if (oStr.isPresent()) { +// out.setPublicationdate(oStr.get().getValue()); +// } +// oStr = Optional.ofNullable(input.getPublisher()); +// if (oStr.isPresent()) { +// out.setPublisher(oStr.get().getValue()); +// } +// +// List sourceList = new ArrayList<>(); +// Optional +// .ofNullable(input.getSource()) +// .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue()))); +// // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); +// List subjectList = new ArrayList<>(); +// Optional +// .ofNullable(input.getSubject()) +// .ifPresent( +// value -> value +// .forEach(s -> subjectList.add(getSubject(s)))); +// +// out.setSubjects(subjectList); +// +// out.setType(input.getResulttype().getClassid()); +// } +// +// out +// .setCollectedfrom( +// input +// .getCollectedfrom() +// .stream() +// .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) +// .collect(Collectors.toList())); +// +// return out; +// +// } +// +// private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { +// CommunityInstance instance = new CommunityInstance(); +// +// setCommonValue(i, instance); +// +// instance +// .setCollectedfrom( +// KeyValue +// .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); +// +// instance +// .setHostedby( +// KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); +// +// return instance; +// +// } +// +// private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { +// Optional opAr = Optional +// .ofNullable(i.getAccessright()); +// if (opAr.isPresent()) { +// if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { +// String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); +// instance +// .setAccessright( +// AccessRight +// .newInstance( +// code, +// Constants.coarCodeLabelMap.get(code), +// Constants.COAR_ACCESS_RIGHT_SCHEMA)); +// } +// } +// +// Optional +// .ofNullable(i.getLicense()) +// .ifPresent(value -> instance.setLicense(value.getValue())); +// Optional +// .ofNullable(i.getDateofacceptance()) +// .ifPresent(value -> instance.setPublicationdate(value.getValue())); +// Optional +// .ofNullable(i.getRefereed()) +// .ifPresent(value -> instance.setRefereed(value.getClassname())); +// Optional +// .ofNullable(i.getInstancetype()) +// .ifPresent(value -> instance.setType(value.getClassname())); +// Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); +// +// } +// +// private static Subject getSubject(StructuredProperty s) { +// Subject subject = new Subject(); +// subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); +// Optional di = Optional.ofNullable(s.getDataInfo()); +// if (di.isPresent()) { +// Provenance p = new Provenance(); +// p.setProvenance(di.get().getProvenanceaction().getClassname()); +// p.setTrust(di.get().getTrust()); +// subject.setProvenance(p); +// } +// +// return subject; +// } +// +// private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { +// Author a = new Author(); +// a.setFullname(oa.getFullname()); +// a.setName(oa.getName()); +// a.setSurname(oa.getSurname()); +// a.setRank(oa.getRank()); +// +// Optional> oPids = Optional +// .ofNullable(oa.getPid()); +// if (oPids.isPresent()) { +// Pid pid = getOrcid(oPids.get()); +// if (pid != null) { +// a.setPid(pid); +// } +// } +// +// return a; +// } +// +// private static Pid getOrcid(List p) { +// for (StructuredProperty pid : p) { +// if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) { +// Optional di = Optional.ofNullable(pid.getDataInfo()); +// if (di.isPresent()) { +// return Pid +// .newInstance( +// ControlledField +// .newInstance( +// pid.getQualifier().getClassid(), +// pid.getValue()), +// Provenance +// .newInstance( +// di.get().getProvenanceaction().getClassname(), +// di.get().getTrust())); +// } else { +// return Pid +// .newInstance( +// ControlledField +// .newInstance( +// pid.getQualifier().getClassid(), +// pid.getValue()) +// +// ); +// } +// +// } +// } +// return null; +// } +// +//} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 602aaf6e6..b65c17ac9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -171,23 +171,23 @@ public class DumpJobTest { GraphResult gr = verificationDataset.first(); - Assertions.assertEquals(2, gr.getMeasures().size()); - Assertions - .assertTrue( - gr - .getMeasures() - .stream() - .anyMatch( - m -> m.getKey().equals("influence") - && m.getValue().equals("1.62759106106e-08"))); - Assertions - .assertTrue( - gr - .getMeasures() - .stream() - .anyMatch( - m -> m.getKey().equals("popularity") - && m.getValue().equals("0.22519296"))); +// Assertions.assertEquals(2, gr.getMeasures().size()); +// Assertions +// .assertTrue( +// gr +// .getMeasures() +// .stream() +// .anyMatch( +// m -> m.getKey().equals("influence") +// && m.getValue().equals("1.62759106106e-08"))); +// Assertions +// .assertTrue( +// gr +// .getMeasures() +// .stream() +// .anyMatch( +// m -> m.getKey().equals("popularity") +// && m.getValue().equals("0.22519296"))); Assertions.assertEquals(6, gr.getAuthor().size()); Assertions diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel index 6b146405a..b56b30fa5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -1,2 +1,2 @@ -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file From ed0c3527997fa3770df347000eaa352cdcbcf26d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 6 Dec 2021 15:07:41 +0100 Subject: [PATCH 077/176] [test-fixing] fixed wrong test --- .../dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 18 ------------------ .../sx/graph/scholix/ScholixGraphTest.scala | 3 ++- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 602aaf6e6..07f663896 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -171,24 +171,6 @@ public class DumpJobTest { GraphResult gr = verificationDataset.first(); - Assertions.assertEquals(2, gr.getMeasures().size()); - Assertions - .assertTrue( - gr - .getMeasures() - .stream() - .anyMatch( - m -> m.getKey().equals("influence") - && m.getValue().equals("1.62759106106e-08"))); - Assertions - .assertTrue( - gr - .getMeasures() - .stream() - .anyMatch( - m -> m.getKey().equals("popularity") - && m.getValue().equals("0.22519296"))); - Assertions.assertEquals(6, gr.getAuthor().size()); Assertions .assertTrue( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index bd7e4fd09..04b1f9ecd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -37,7 +37,8 @@ class ScholixGraphTest extends AbstractVocabularyTest{ val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")).mkString val res =SparkResolveRelation.extractPidsFromRecord(input) assertNotNull(res) - assertTrue(res._2.size == 2) + + assertEquals(1,res._2.size) } From d1df01ff1e9aa99b9412cf0bbe87b239e058e44c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 6 Dec 2021 15:15:48 +0100 Subject: [PATCH 078/176] [Graph Dump] fixed resource for test --- .../oa/graph/dump/addProjectInfo/publication_extendedmodel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel index 6b146405a..b56b30fa5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -1,2 +1,2 @@ -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file From d9836f0cf3fd5cd766d3a92e09efd8a533c69efd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 6 Dec 2021 15:27:09 +0100 Subject: [PATCH 079/176] [OpenCitations] fixed test when executed one after the other --- .../CreateOpenCitationsASTest.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index 5a04dcefe..5153c412f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -89,13 +89,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet1" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet1", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -121,13 +121,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet2" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet2", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -153,13 +153,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet3" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet3", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -186,13 +186,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet4" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet4", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -226,13 +226,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet5" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet5", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -261,13 +261,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet6" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet6", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -306,13 +306,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet7" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet7", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); From 6b5d7688a4b298218526218984d4845b357f008d Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 9 Dec 2021 13:46:48 +0100 Subject: [PATCH 080/176] #7275 serialize license information in XML records --- .../dhp/oa/provision/utils/XmlRecordFactory.java | 11 +++++++++++ .../eu/dnetlib/dhp/oa/provision/publication.json | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 6926f90d0..a6924b7d5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1212,6 +1212,17 @@ public class XmlRecordFactory implements Serializable { "processingchargecurrency", instance.getProcessingchargecurrency())); } + if (instance.getLicense() != null) { + fields + .addAll( + instance + .getLicense() + .stream() + .filter(d -> isNotBlank(d)) + .map(d -> XmlSerializationUtils.asXmlElement("license", d)) + .collect(Collectors.toList())); + } + children .add( templateFactory diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index 9794fd956..d5aa13ed6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -541,7 +541,7 @@ }, "trust": "" }, - "value": "" + "value": "CC-BY" }, "url": [ "http://dx.doi.org/10.1109/TED.2018.2853550" From e6e177dda0c06f877673b78baf25b3cc9cb390c3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 9 Dec 2021 13:57:53 +0100 Subject: [PATCH 081/176] vocabulary based cleaning considers also the term label when looking up for a synonym --- .../common/vocabulary/VocabularyGroup.java | 8 ++ .../clean/GraphCleaningFunctionsTest.java | 7 +- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 86 +++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index d5f57849c..1c129ff9c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -57,9 +57,17 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); + } } + // add the term names as synonyms + vocs.vocs.values().forEach(voc -> { + voc.getTerms().values().forEach(term -> { + voc.addSynonym(term.getName().toLowerCase(), term.getId()); + }); + }); + return vocs; } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 0264a6266..5ceb644c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -151,6 +151,9 @@ public class GraphCleaningFunctionsTest { assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid()); assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname()); + assertEquals("0033", p_out.getInstance().get(1).getInstancetype().getClassid()); + assertEquals("Audiovisual", p_out.getInstance().get(1).getInstancetype().getClassname()); + assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid()); assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname()); @@ -164,7 +167,7 @@ public class GraphCleaningFunctionsTest { List poi = p_out.getInstance(); assertNotNull(poi); - assertEquals(1, poi.size()); + assertEquals(2, poi.size()); final Instance poii = poi.get(0); assertNotNull(poii); @@ -213,7 +216,7 @@ public class GraphCleaningFunctionsTest { final List pci = p_cleaned.getInstance(); assertNotNull(pci); - assertEquals(1, pci.size()); + assertEquals(2, pci.size()); final Instance pcii = pci.get(0); assertNotNull(pcii); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index b3e302474..5b9e86c65 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -403,6 +403,92 @@ "http://juuli.fi/Record/0275158616", "http://dx.doi.org/10.1007/s109090161569x" ] + }, + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1002/s21010127267xy" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Audiovisual", + "classname": "Audiovisual", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://dx.doi.org/10.1002/s21010127267xy" + ] } ], "journal": { From e53228401b1697b7a3ac1546188beb3e183efa61 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 9 Dec 2021 15:46:22 +0100 Subject: [PATCH 082/176] style --- .../dhp/oa/provision/utils/XmlRecordFactory.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a6924b7d5..d97151ab2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1214,13 +1214,13 @@ public class XmlRecordFactory implements Serializable { if (instance.getLicense() != null) { fields - .addAll( - instance - .getLicense() - .stream() - .filter(d -> isNotBlank(d)) - .map(d -> XmlSerializationUtils.asXmlElement("license", d)) - .collect(Collectors.toList())); + .addAll( + instance + .getLicense() + .stream() + .filter(d -> isNotBlank(d)) + .map(d -> XmlSerializationUtils.asXmlElement("license", d)) + .collect(Collectors.toList())); } children From 41c70c607dc7fd8786da3711a52477907e5a4d1a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 9 Dec 2021 16:44:28 +0100 Subject: [PATCH 083/176] cleaning workflow assigns the proper default instance type when a value could not be cleaned using the vocabularies --- .../oaf/utils/GraphCleaningFunctions.java | 36 +++++++- .../oa/graph/clean/CleanGraphSparkJob.java | 2 +- .../clean/GraphCleaningFunctionsTest.java | 20 +++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 2 +- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 90 ++++++++++++++++++- 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index f5781390a..42e19628c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -16,6 +16,8 @@ import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import eu.dnetlib.dhp.common.vocabulary.Vocabulary; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -131,7 +133,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { return true; } - public static T cleanup(T value) { + public static T cleanup(T value, VocabularyGroup vocs) { if (value instanceof Datasource) { // nothing to clean here } else if (value instanceof Project) { @@ -250,6 +252,38 @@ public class GraphCleaningFunctions extends CleaningFunctions { if (Objects.nonNull(r.getInstance())) { for (Instance i : r.getInstance()) { + if (!vocs.termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) { + if (r instanceof Publication) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof Dataset) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof Software) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof OtherResearchProduct) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } + } + if (Objects.nonNull(i.getPid())) { i.setPid(processPidCleaning(i.getPid())); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java index d43d7ce28..2e2ea567a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java @@ -88,7 +88,7 @@ public class CleanGraphSparkJob { readTableFromPath(spark, inputPath, clazz) .map((MapFunction) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz)) .map((MapFunction) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz)) - .map((MapFunction) GraphCleaningFunctions::cleanup, Encoders.bean(clazz)) + .map((MapFunction) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz)) .filter((FilterFunction) GraphCleaningFunctions::filter) .write() .mode(SaveMode.Overwrite) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 5ceb644c9..a7c7eb810 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -151,8 +151,11 @@ public class GraphCleaningFunctionsTest { assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid()); assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname()); - assertEquals("0033", p_out.getInstance().get(1).getInstancetype().getClassid()); - assertEquals("Audiovisual", p_out.getInstance().get(1).getInstancetype().getClassname()); + assertEquals("0027", p_out.getInstance().get(1).getInstancetype().getClassid()); + assertEquals("Model", p_out.getInstance().get(1).getInstancetype().getClassname()); + + assertEquals("xyz", p_out.getInstance().get(2).getInstancetype().getClassid()); + assertEquals("xyz", p_out.getInstance().get(2).getInstancetype().getClassname()); assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid()); assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname()); @@ -167,7 +170,7 @@ public class GraphCleaningFunctionsTest { List poi = p_out.getInstance(); assertNotNull(poi); - assertEquals(2, poi.size()); + assertEquals(3, poi.size()); final Instance poii = poi.get(0); assertNotNull(poii); @@ -195,7 +198,7 @@ public class GraphCleaningFunctionsTest { assertEquals(5, p_out.getTitle().size()); - Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); + Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); assertEquals(3, p_cleaned.getTitle().size()); @@ -214,9 +217,12 @@ public class GraphCleaningFunctionsTest { assertEquals("1970-10-07", p_cleaned.getDateofacceptance().getValue()); + assertEquals("0038", p_cleaned.getInstance().get(2).getInstancetype().getClassid()); + assertEquals("Other literature type", p_cleaned.getInstance().get(2).getInstancetype().getClassname()); + final List pci = p_cleaned.getInstance(); assertNotNull(pci); - assertEquals(2, pci.size()); + assertEquals(3, pci.size()); final Instance pcii = pci.get(0); assertNotNull(pcii); @@ -284,7 +290,7 @@ public class GraphCleaningFunctionsTest { .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); - Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); } @@ -295,7 +301,7 @@ public class GraphCleaningFunctionsTest { .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); - Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 27e33bf27..de79b750a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -708,7 +708,7 @@ class MappersTest { assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); - final Publication p_cleaned = cleanup(fixVocabularyNames(p)); + final Publication p_cleaned = cleanup(fixVocabularyNames(p), vocs); assertNotNull(p_cleaned.getTitle()); assertFalse(p_cleaned.getTitle().isEmpty()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 5b9e86c65..78fdc4c9d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -481,14 +481,100 @@ "value": "VIRTA" }, "instancetype": { - "classid": "Audiovisual", - "classname": "Audiovisual", + "classid": "Model", + "classname": "Model", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource" }, "url": [ "http://dx.doi.org/10.1002/s21010127267xy" ] + }, + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1002/s21010127267xy" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "xyz", + "classname": "xyz", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://dx.doi.org/10.1002/t32121238378t" + ] } ], "journal": { From b70ecccea07c239be0f7dd3ec5a63deb4e32f44a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Sun, 12 Dec 2021 12:37:38 +0100 Subject: [PATCH 084/176] avoid NPEs merging XMLInstance(s) --- .../java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index d97151ab2..66827af67 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1293,6 +1293,7 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.groupingBy(ImmutablePair::getLeft)) .values() .stream() + .filter(Objects::nonNull) .map(this::mergeInstances); } From 5e17247bb658de3d7b5fb0db39d3337cbeaa6fd1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 13 Dec 2021 11:48:40 +0100 Subject: [PATCH 085/176] avoid NPEs merging XMLInstance(s) --- .../oa/provision/utils/XmlRecordFactory.java | 89 +++++++++---------- 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 66827af67..8bf2a4185 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,50 +1,6 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; -import static org.apache.commons.lang3.StringUtils.isNotBlank; -import static org.apache.commons.lang3.StringUtils.substringBefore; - -import java.io.IOException; -import java.io.Serializable; -import java.io.StringReader; -import java.io.StringWriter; -import java.net.URL; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.function.Function; -import java.util.stream.Collector; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.solr.common.util.URLUtil; -import org.apache.spark.util.LongAccumulator; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.OutputFormat; -import org.dom4j.io.SAXReader; -import org.dom4j.io.XMLWriter; - import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.Splitter; @@ -53,14 +9,42 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; - import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.model.XmlInstance; import eu.dnetlib.dhp.schema.common.*; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.spark.util.LongAccumulator; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Element; +import org.dom4j.Node; +import org.dom4j.io.OutputFormat; +import org.dom4j.io.SAXReader; +import org.dom4j.io.XMLWriter; + +import javax.xml.transform.*; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import java.io.IOException; +import java.io.Serializable; +import java.io.StringReader; +import java.io.StringWriter; +import java.net.URL; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; +import static org.apache.commons.lang3.StringUtils.isNotBlank; +import static org.apache.commons.lang3.StringUtils.substringBefore; public class XmlRecordFactory implements Serializable { @@ -1274,6 +1258,7 @@ public class XmlRecordFactory implements Serializable { private Stream groupInstancesByUrl(List instance) { return instance .stream() + .filter(i -> Objects.nonNull(i.getUrl())) .map(i -> { i .setUrl( @@ -1329,18 +1314,24 @@ public class XmlRecordFactory implements Serializable { instance.getCollectedfrom().add(i.getCollectedfrom()); instance.getHostedby().add(i.getHostedby()); instance.getInstancetype().add(i.getInstancetype()); - instance.getLicense().add(i.getLicense().getValue()); - instance.getDistributionlocation().add(i.getDistributionlocation()); instance.getPid().addAll(i.getPid()); instance.getAlternateIdentifier().addAll(i.getAlternateIdentifier()); - instance.getDateofacceptance().add(i.getDateofacceptance().getValue()); + instance.getRefereed().add(i.getRefereed()); instance .setProcessingchargeamount( Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null)); instance .setProcessingchargecurrency( Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); - instance.getRefereed().add(i.getRefereed()); + Optional + .ofNullable(i.getDateofacceptance()) + .ifPresent(d -> instance.getDateofacceptance().add(d.getValue())); + Optional + .ofNullable(i.getLicense()) + .ifPresent(license -> instance.getLicense().add(license.getValue())); + Optional + .ofNullable(i.getDistributionlocation()) + .ifPresent(dl -> instance.getDistributionlocation().add(dl)); }); if (instance.getHostedby().size() > 1 From 98eb292c5971e90cbd67d7e833f720879d9dff5e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 13 Dec 2021 13:27:20 +0100 Subject: [PATCH 086/176] avoid NPEs merging XMLInstance(s) --- .../oa/provision/utils/XmlRecordFactory.java | 73 ++++++++++--------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 8bf2a4185..af7c7b7c3 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,22 +1,24 @@ package eu.dnetlib.dhp.oa.provision.utils; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Joiner; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import com.mycila.xmltool.XMLDoc; -import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.model.XmlInstance; -import eu.dnetlib.dhp.schema.common.*; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; +import static org.apache.commons.lang3.StringUtils.isNotBlank; +import static org.apache.commons.lang3.StringUtils.substringBefore; + +import java.io.IOException; +import java.io.Serializable; +import java.io.StringReader; +import java.io.StringWriter; +import java.net.URL; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.xml.transform.*; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -29,22 +31,23 @@ import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; -import javax.xml.transform.*; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import java.io.IOException; -import java.io.Serializable; -import java.io.StringReader; -import java.io.StringWriter; -import java.net.URL; -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.mycila.xmltool.XMLDoc; +import com.mycila.xmltool.XMLTag; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; -import static org.apache.commons.lang3.StringUtils.isNotBlank; -import static org.apache.commons.lang3.StringUtils.substringBefore; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; +import eu.dnetlib.dhp.oa.provision.model.XmlInstance; +import eu.dnetlib.dhp.schema.common.*; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { @@ -1314,8 +1317,6 @@ public class XmlRecordFactory implements Serializable { instance.getCollectedfrom().add(i.getCollectedfrom()); instance.getHostedby().add(i.getHostedby()); instance.getInstancetype().add(i.getInstancetype()); - instance.getPid().addAll(i.getPid()); - instance.getAlternateIdentifier().addAll(i.getAlternateIdentifier()); instance.getRefereed().add(i.getRefereed()); instance .setProcessingchargeamount( @@ -1323,6 +1324,12 @@ public class XmlRecordFactory implements Serializable { instance .setProcessingchargecurrency( Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); + Optional + .ofNullable(i.getPid()) + .ifPresent(pid -> instance.getPid().addAll(pid)); + Optional + .ofNullable(i.getAlternateIdentifier()) + .ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId)); Optional .ofNullable(i.getDateofacceptance()) .ifPresent(d -> instance.getDateofacceptance().add(d.getValue())); From 8d755cca808062f2f55e70c1c927eca7c84131cb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 13 Dec 2021 15:01:40 +0100 Subject: [PATCH 087/176] - --- .../dump/complete/CreateContextEntities.java | 2 +- .../dump/complete/CreateContextRelation.java | 2 +- .../dump/complete/SparkCollectAndSave.java | 2 +- .../dump/complete/SparkDumpEntitiesJob.java | 2 +- .../dump/complete/SparkDumpRelationJob.java | 9 +- .../complete/SparkOrganizationRelation.java | 2 +- .../SparkSelectValidRelationsJob.java | 2 +- .../dhp/oa/graph/dump/DumpJobTest.java | 49 +++++++-- .../graph/dump/complete/DumpRelationTest.java | 100 ++++++++---------- 9 files changed, 94 insertions(+), 76 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index aa031203d..c422c3b40 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -41,7 +41,7 @@ public class CreateContextEntities implements Serializable { .toString( CreateContextEntities.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 3c71fcaa5..03f53de64 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -48,7 +48,7 @@ public class CreateContextRelation implements Serializable { .requireNonNull( CreateContextRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java index e902a02c8..1c98ea6cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java @@ -31,7 +31,7 @@ public class SparkCollectAndSave implements Serializable { .toString( SparkCollectAndSave.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java index f239ffca7..7cc05a821 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java @@ -22,7 +22,7 @@ public class SparkDumpEntitiesJob implements Serializable { .toString( SparkDumpEntitiesJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/wf/input_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index 05e3f8835..0ac97dcb6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -38,7 +38,7 @@ public class SparkDumpRelationJob implements Serializable { .toString( SparkDumpRelationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -55,13 +55,12 @@ public class SparkDumpRelationJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - Optional rs = Optional.ofNullable(parser.get("removeSet")); + Optional rs = Optional.ofNullable(parser.get("removeSet")); final Set removeSet = new HashSet<>(); - if(rs.isPresent()){ + if (rs.isPresent()) { Collections.addAll(removeSet, rs.get().split(";")); } - SparkConf conf = new SparkConf(); runWithSparkSession( @@ -78,7 +77,7 @@ public class SparkDumpRelationJob implements Serializable { private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set removeSet) { Dataset relations = Utils.readPath(spark, inputPath, Relation.class); relations - .filter((FilterFunction)r -> !removeSet.contains(r.getRelClass())) + .filter((FilterFunction) r -> !removeSet.contains(r.getRelClass())) .map((MapFunction) relation -> { eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); relNew diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 4475232de..dff1a1066 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -39,7 +39,7 @@ public class SparkOrganizationRelation implements Serializable { .toString( SparkOrganizationRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index d1bca6a7c..1e5675e5f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -35,7 +35,7 @@ public class SparkSelectValidRelationsJob implements Serializable { .toString( SparkSelectValidRelationsJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 6dc56c846..30e2f93ef 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -917,7 +917,7 @@ public class DumpJobTest { DumpProducts dump = new DumpProducts(); dump .run( - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -949,18 +949,45 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); temp = spark - .sql( - "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + - "from check " + - "lateral view explode (instance) i as inst " + - "where inst.articleprocessingcharge is not null"); + .sql( + "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + + "from check " + + "lateral view explode (instance) i as inst " + + "where inst.articleprocessingcharge is not null"); + Assertions + .assertEquals( + "3131.64", + temp + .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") + .collectAsList() + .get(0) + .getString(1)); + Assertions + .assertEquals( + "EUR", + temp + .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") + .collectAsList() + .get(0) + .getString(2)); - Assertions.assertEquals("3131.64", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(1)); - Assertions.assertEquals("EUR", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(2)); - - Assertions.assertEquals("2578.35", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(1)); - Assertions.assertEquals("EUR", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(2)); + Assertions + .assertEquals( + "2578.35", + temp + .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") + .collectAsList() + .get(0) + .getString(1)); + Assertions + .assertEquals( + "EUR", + temp + .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") + .collectAsList() + .get(0) + .getString(2)); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index 8a2eb585e..763318ae4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -4,10 +4,8 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; - import java.util.HashMap; - import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -83,7 +81,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -145,7 +142,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -207,105 +203,101 @@ public class DumpRelationTest { @Test public void test3() throws Exception {// final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath, - "-removeSet", "isParticipant" + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant" }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); verificationDataset.createOrReplaceTempView("table"); verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); Assertions - .assertEquals( - 22, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); - Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); Assertions - .assertEquals( - 1, check - .filter( - "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + - "and provenance = 'Inferred by OpenAIRE'") - .count()); + .assertEquals( + 1, check + .filter( + "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + + "and provenance = 'Inferred by OpenAIRE'") + .count()); } @Test public void test4() throws Exception {// final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath, - "-removeSet", "isParticipant;isAuthorInstitutionOf" + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant;isAuthorInstitutionOf" }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); verificationDataset.createOrReplaceTempView("table"); verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); Assertions - .assertEquals( - 22, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); - Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count()); } From 4eb8276493b8f936ecc45bf5316822fce277a4c8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 14 Dec 2021 11:12:17 +0100 Subject: [PATCH 088/176] - --- .../actionmanager/bipfinder/Constants.java | 49 ++++ .../bipfinder/PreparedResult.java | 28 --- .../bipfinder/SparkAtomicActionScoreJob.java | 64 +++--- .../bipfinder/oozie_app/config-default.xml | 30 +++ .../bipfinder/oozie_app/workflow.xml | 34 +++ .../SparkAtomicActionScoreJobTest.java | 210 ++---------------- .../bipfinder/bip_scores_oid.json | 4 + .../bipfinder/publicationoaid.json | 18 ++ 8 files changed, 185 insertions(+), 252 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java new file mode 100644 index 000000000..0e08fd06c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java @@ -0,0 +1,49 @@ + +package eu.dnetlib.dhp.actionmanager.bipfinder; + +import java.util.Optional; + +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class Constants { + + public static final String DOI = "doi"; + + public static final String UPDATE_DATA_INFO_TYPE = "update"; + public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; + public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; + public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; + + public static final String FOS_CLASS_ID = "FOS"; + public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; + + public static final String NULL = "NULL"; + + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private Constants() { + } + + public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) { + return Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java deleted file mode 100644 index 493e94417..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java +++ /dev/null @@ -1,28 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.bipfinder; - -import java.io.Serializable; - -/** - * Subset of the information of the generic results that are needed to create the atomic action - */ -public class PreparedResult implements Serializable { - private String id; // openaire id - private String value; // doi - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index f178451c1..d4dce6ae4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; +import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -18,6 +19,7 @@ import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,8 +29,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; /** @@ -105,32 +109,29 @@ public class SparkAtomicActionScoreJob implements Serializable { results.createOrReplaceTempView("result"); - Dataset preparedResult = spark + Dataset preparedResult = spark .sql( - "select pIde.value value, id " + + "select id " + "from result " + - "lateral view explode (pid) p as pIde " + - "where dataInfo.deletedbyinference = false and pIde.qualifier.classid = '" + DOI + "'") - .as(Encoders.bean(PreparedResult.class)); + + "where dataInfo.deletedbyinference = false "); bipScores .joinWith( - preparedResult, bipScores.col("id").equalTo(preparedResult.col("value")), + preparedResult, bipScores.col("id").equalTo(preparedResult.col("id")), "inner") - .map((MapFunction, BipScore>) value -> { + .map((MapFunction, BipScore>) value -> { BipScore ret = value._1(); - ret.setId(value._2().getId()); + ret.setId(value._1().getId()); return ret; }, Encoders.bean(BipScore.class)) - .groupByKey((MapFunction) BipScore::getId, Encoders.STRING()) - .mapGroups((MapGroupsFunction) (k, it) -> { - Result ret = new Result(); - ret.setDataInfo(getDataInfo()); - BipScore first = it.next(); - ret.setId(first.getId()); - ret.setMeasures(getMeasure(first)); - it.forEachRemaining(value -> ret.getMeasures().addAll(getMeasure(value))); + .map((MapFunction) bs -> { + Result ret = new Result(); + + ret.setId(bs.getId()); + + ret.setMeasures(getMeasure(bs)); return ret; }, Encoders.bean(Result.class)) @@ -159,7 +160,21 @@ public class SparkAtomicActionScoreJob implements Serializable { KeyValue kv = new KeyValue(); kv.setValue(unit.getValue()); kv.setKey(unit.getKey()); - kv.setDataInfo(getDataInfo()); + kv + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, + OafMapperUtils + .qualifier( + UPDATE_MEASURE_BIP_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); return kv; }) .collect(Collectors.toList())); @@ -168,21 +183,6 @@ public class SparkAtomicActionScoreJob implements Serializable { .collect(Collectors.toList()); } - private static DataInfo getDataInfo() { - DataInfo di = new DataInfo(); - di.setInferred(false); - di.setInvisible(false); - di.setDeletedbyinference(false); - di.setTrust(""); - Qualifier qualifier = new Qualifier(); - qualifier.setClassid("sysimport:actionset"); - qualifier.setClassname("Harvested"); - qualifier.setSchemename("dnet:provenanceActions"); - qualifier.setSchemeid("dnet:provenanceActions"); - di.setProvenanceaction(qualifier); - return di; - } - private static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml index 5311a6614..eef429087 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml @@ -13,6 +13,40 @@ outputPath the path where to store the actionset + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index f2158748b..f040ee836 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -28,6 +28,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; public class SparkAtomicActionScoreJobTest { @@ -69,13 +70,13 @@ public class SparkAtomicActionScoreJobTest { } @Test - void matchOne() throws Exception { + void testMatch() throws Exception { String bipScoresPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") + .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json") .getPath(); String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json") + "/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json") .getPath(); SparkAtomicActionScoreJob @@ -95,223 +96,48 @@ public class SparkAtomicActionScoreJobTest { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Publication) aa.getPayload())); + .map(aa -> ((Result) aa.getPayload())); - assertEquals(1, tmp.count()); + assertEquals(4, tmp.count()); - Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); - verificationDataset.createOrReplaceTempView("publication"); + Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Result.class)); + verificationDataset.createOrReplaceTempView("result"); Dataset execVerification = spark .sql( - "Select p.id oaid, mes.id, mUnit.value from publication p " + + "Select p.id oaid, mes.id, mUnit.value from result p " + "lateral view explode(measures) m as mes " + "lateral view explode(mes.unit) u as mUnit "); - Assertions.assertEquals(2, execVerification.count()); - + Assertions.assertEquals(12, execVerification.count()); Assertions .assertEquals( - "50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb", - execVerification.select("oaid").collectAsList().get(0).getString(0)); - - Assertions - .assertEquals( - "1.47565045883e-08", - execVerification.filter("id = 'influence'").select("value").collectAsList().get(0).getString(0)); - - Assertions - .assertEquals( - "0.227515392", - execVerification.filter("id = 'popularity'").select("value").collectAsList().get(0).getString(0)); - - } - - @Test - void matchOneWithTwo() throws Exception { - String bipScoresPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") - .getPath(); - String inputPath = getClass() - .getResource( - "/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json") - .getPath(); - - SparkAtomicActionScoreJob - .main( - new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - inputPath, - "-bipScorePath", - bipScoresPath, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", - workingDir.toString() + "/actionSet" - }); - - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) - .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Publication) aa.getPayload())); - - assertEquals(1, tmp.count()); - - Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); - verificationDataset.createOrReplaceTempView("publication"); - - Dataset execVerification = spark - .sql( - "Select p.id oaid, mes.id, mUnit.value from publication p " + - "lateral view explode(measures) m as mes " + - "lateral view explode(mes.unit) u as mUnit "); - - Assertions.assertEquals(4, execVerification.count()); - - Assertions - .assertEquals( - "50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb", - execVerification.select("oaid").collectAsList().get(0).getString(0)); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'influence'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'popularity'").count()); - - List tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); - String tmp_influence = tmp_ds.get(0).getString(0); - assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); - - tmp_influence = tmp_ds.get(1).getString(0); - assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); - - assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0)); - - } - - @Test - void matchTwo() throws Exception { - String bipScoresPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") - .getPath(); - String inputPath = getClass() - .getResource( - "/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json") - .getPath(); - - SparkAtomicActionScoreJob - .main( - new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - inputPath, - "-bipScorePath", - bipScoresPath, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", - workingDir.toString() + "/actionSet" - }); - - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) - .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Publication) aa.getPayload())); - - assertEquals(2, tmp.count()); - - Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); - verificationDataset.createOrReplaceTempView("publication"); - - Dataset execVerification = spark - .sql( - "Select p.id oaid, mes.id, mUnit.value from publication p " + - "lateral view explode(measures) m as mes " + - "lateral view explode(mes.unit) u as mUnit "); - - Assertions.assertEquals(4, execVerification.count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'influence'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'popularity'").count()); - - Assertions - .assertEquals( - "1.47565045883e-08", - execVerification + "6.63451994567e-09", execVerification .filter( - "oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " + + "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + "and id = 'influence'") .select("value") .collectAsList() .get(0) .getString(0)); - Assertions .assertEquals( - "1.98956540239e-08", - execVerification + "0.348694533145", execVerification .filter( - "oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " + - "and id = 'influence'") + "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + + "and id = 'popularity_alt'") .select("value") .collectAsList() .get(0) .getString(0)); - Assertions .assertEquals( - "0.282046161584", - execVerification + "2.16094680115e-09", execVerification .filter( - "oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " + - "and id = 'popularity'") - .select("value") - .collectAsList() - .get(0) - .getString(0)); - - Assertions - .assertEquals( - "0.227515392", - execVerification - .filter( - "oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " + + "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + "and id = 'popularity'") .select("value") .collectAsList() diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json new file mode 100644 index 000000000..5dcf486d0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json @@ -0,0 +1,4 @@ +{"50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1": [{"id": "influence", "unit": [{"value": "6.63451994567e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.348694533145", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.16094680115e-09", "key": "score"}]}]} +{"50|dedup_wf_001::05b1f8ce98702f69d07aa5f0429de1e3": [{"id": "influence", "unit": [{"value": "6.25057357279e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "7.0208", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.40234462244e-08", "key": "score"}]}]} +{"50|dedup_wf_001::08823c8f5c3ca2eae523817036cdda67": [{"id": "influence", "unit": [{"value": "5.54921449123e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.53012887452e-10", "key": "score"}]}]} +{"50|dedup_wf_001::0e72b399325d6efcbe3271891a1dfe4c": [{"id": "influence", "unit": [{"value": "1.63466096315e-08", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "20.9870879741", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.49501495323e-08", "key": "score"}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json new file mode 100644 index 000000000..6a4506816 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json @@ -0,0 +1,18 @@ +{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} +{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::05b1f8ce98702f69d07aa5f0429de1e3","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} +{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::08823c8f5c3ca2eae523817036cdda67","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} +{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::0e72b399325d6efcbe3271891a1dfe4c","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} +{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} +{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} +{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} +{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} +{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} +{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} +{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} +{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} +{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} +{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} +{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} +{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} +{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} +{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file From aff3ddc8d25932f802c1871afa858ecfa20c61d7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 14 Dec 2021 11:41:46 +0100 Subject: [PATCH 089/176] added cleaning for the format field, removing carrige return and tab characters --- .../dhp/schema/oaf/utils/GraphCleaningFunctions.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 42e19628c..37f5573d0 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -230,6 +230,15 @@ public class GraphCleaningFunctions extends CleaningFunctions { .map(GraphCleaningFunctions::cleanValue) .collect(Collectors.toList())); } + if (Objects.nonNull(r.getFormat())) { + r + .setFormat( + r + .getFormat() + .stream() + .map(GraphCleaningFunctions::cleanValue) + .collect(Collectors.toList())); + } if (Objects.nonNull(r.getDescription())) { r .setDescription( From 6fb6236cd485b869eb23e87c4ceac5e3df3ecb4b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 14 Dec 2021 14:51:14 +0100 Subject: [PATCH 090/176] changed the way to produce the AS for bipFinder. --- .../bipfinder/CollectAndSave.java | 86 ----------- .../bipfinder/SparkAtomicActionScoreJob.java | 36 +---- .../bipfinder/oozie_app/workflow.xml | 137 +++--------------- 3 files changed, 27 insertions(+), 232 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java deleted file mode 100644 index a48b84a33..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java +++ /dev/null @@ -1,86 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.bipfinder; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.Result; - -/** - * Just collects all the atomic actions produced for the different results and saves them in - * outputpath for the ActionSet - */ -public class CollectAndSave implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); - - public static void main(String[] args) throws Exception { - - String jsonConfiguration = IOUtils - .toString( - Objects - .requireNonNull( - CollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"))); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}: ", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - collectAndSave(spark, inputPath, outputPath); - }); - } - - private static void collectAndSave(SparkSession spark, String inputPath, String outputPath) { - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - sc - .sequenceFile(inputPath + "/publication", Text.class, Text.class) - .union(sc.sequenceFile(inputPath + "/dataset", Text.class, Text.class)) - .union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class)) - .union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class)) - .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); - } - - private static void removeOutputDir(SparkSession spark, String path) { - HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index d4dce6ae4..8cda19d07 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -50,7 +50,7 @@ public class SparkAtomicActionScoreJob implements Serializable { .toString( SparkAtomicActionScoreJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -69,14 +69,6 @@ public class SparkAtomicActionScoreJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath {}: ", outputPath); - final String bipScorePath = parser.get("bipScorePath"); - log.info("bipScorePath: {}", bipScorePath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - Class inputClazz = (Class) Class.forName(resultClassName); - SparkConf conf = new SparkConf(); runWithSparkSession( @@ -84,12 +76,11 @@ public class SparkAtomicActionScoreJob implements Serializable { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareResults(spark, inputPath, outputPath, bipScorePath, inputClazz); + prepareResults(spark, inputPath, outputPath); }); } - private static void prepareResults(SparkSession spark, String inputPath, String outputPath, - String bipScorePath, Class inputClazz) { + private static void prepareResults(SparkSession spark, String bipScorePath, String outputPath) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -105,26 +96,7 @@ public class SparkAtomicActionScoreJob implements Serializable { return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); - Dataset results = readPath(spark, inputPath, inputClazz); - - results.createOrReplaceTempView("result"); - - Dataset preparedResult = spark - .sql( - "select id " + - "from result " + - - "where dataInfo.deletedbyinference = false "); - bipScores - .joinWith( - preparedResult, bipScores.col("id").equalTo(preparedResult.col("id")), - "inner") - .map((MapFunction, BipScore>) value -> { - BipScore ret = value._1(); - ret.setId(value._1().getId()); - return ret; - }, Encoders.bean(BipScore.class)) .map((MapFunction) bs -> { Result ret = new Result(); @@ -136,7 +108,7 @@ public class SparkAtomicActionScoreJob implements Serializable { return ret; }, Encoders.bean(Result.class)) .toJavaRDD() - .map(p -> new AtomicAction(inputClazz, p)) + .map(p -> new AtomicAction(Result.class, p)) .mapToPair( aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), new Text(OBJECT_MAPPER.writeValueAsString(aa)))) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml index eef429087..d2fe6712c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml @@ -49,6 +49,25 @@ + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -64,14 +83,8 @@
- - - - - - - + yarn cluster @@ -88,118 +101,14 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${inputPath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication - --bipScorePath${bipScorePath} - - - - - - - - yarn - cluster - Produces the atomic action with the bip finder scores for datasets - eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${inputPath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset - --bipScorePath${bipScorePath} - - - - - - - - yarn - cluster - Produces the atomic action with the bip finder scores for orp - eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${inputPath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct - --bipScorePath${bipScorePath} - - - - - - - - yarn - cluster - Produces the atomic action with the bip finder scores for software - eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${inputPath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software - --bipScorePath${bipScorePath} - - - - - - - - - - yarn - cluster - saves all the aa produced for the several types of results in the as output path - eu.dnetlib.dhp.actionmanager.bipfinder.CollectAndSave - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${workingDir} + --inputPath${bipScorePath} --outputPath${outputPath} + + \ No newline at end of file From 22d4b5619b71941dfa73042f131176ed1e332869 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 14 Dec 2021 14:54:13 +0100 Subject: [PATCH 091/176] [BipFinder Result] last changes to test and resources files --- .../bipfinder/input_parameters.json | 32 ------------------- .../bipfinder/oozie_app/workflow.xml | 2 +- .../SparkAtomicActionScoreJobTest.java | 6 ++-- 3 files changed, 3 insertions(+), 37 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json deleted file mode 100644 index 31771a40a..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "ip", - "paramLongName": "inputPath", - "paramDescription": "the URL from where to get the programme file", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - }, - { - "paramName": "rtn", - "paramLongName": "resultTableName", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - }, - { - "paramName": "bsp", - "paramLongName": "bipScorePath", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml index d2fe6712c..5b97a3d1a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml @@ -108,7 +108,7 @@ - + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index f040ee836..c4a84cb46 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -85,11 +85,9 @@ public class SparkAtomicActionScoreJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-inputPath", - inputPath, - "-bipScorePath", + bipScoresPath, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", workingDir.toString() + "/actionSet" }); From e5bff64f2e34355a7e9ce2ba1c3b49b517e8211e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 14 Dec 2021 09:56:55 +0100 Subject: [PATCH 092/176] [scholexplorer] - Minor fix on SparkConvertRDDtoDataset -first implementation of retrieve datacite dump --- .../sx/graph/SparkRetrieveDataciteDelta.scala | 54 +++++++++++++++++++ .../graph/finalGraph/oozie_app/workflow.xml | 2 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 28 +++++----- 3 files changed, 69 insertions(+), 15 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala new file mode 100644 index 000000000..62f53e4ad --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala @@ -0,0 +1,54 @@ +package eu.dnetlib.dhp.sx.graph + +import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.apache.spark.sql.functions.max +import org.slf4j.Logger + +class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { + + + def retrieveLastCollectedFrom(spark:SparkSession, entitiesPath:String):String = { + log.info("Retrieve last entities collected From") + + implicit val oafEncoder:Encoder[Result] = Encoders.kryo[Result] + import spark.implicits._ + + val entitiesDS = spark.read.load(s"$entitiesPath/*").as[Result] + + entitiesDS.filter(r => r.getDateofcollection!= null).map(_.getDateofcollection).select(max("value")).first.getString(0) + + + + } + + + /** + * Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { + val sourcePath = parser.get("sourcePath") + log.info(s"SourcePath is '$sourcePath'") + + val datacitePath = parser.get("datacitePath") + log.info(s"DatacitePath is '$datacitePath'") + + + log.info("Retrieve last entities collected From") + + implicit val oafEncoder:Encoder[Result] = Encoders.kryo[Result] + + val lastCollectionDate = retrieveLastCollectedFrom(spark, s"$sourcePath/entities") + + + + + + + + + + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml index 17996c82c..85c0d486d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml @@ -79,7 +79,7 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=20000 + --conf spark.sql.shuffle.partitions=30000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 23f039c70..2115df1fd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,12 +2,11 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Software,Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} - object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { @@ -32,39 +31,40 @@ object SparkConvertRDDtoDataset { val entityPath = s"$t/entities" val relPath = s"$t/relation" val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) log.info("Converting dataset") - val rddDataset = spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") log.info("Converting publication") - val rddPublication = spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") log.info("Converting software") - val rddSoftware = spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") log.info("Converting otherresearchproduct") - val rddOtherResearchProduct = spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") log.info("Converting Relation") - val relationSemanticFilter = List("cites", "iscitedby", "merges", "ismergedin") + val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin", "HasAmongTopNSimilarDocuments","IsAmongTopNSimilarDocuments" ) - val rddRelation = spark.sparkContext.textFile(s"$sourcePath/relation") + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) + .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") From 63952018c09dd5f1c43f5a4323b2d3fdb2a7ee17 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 14 Dec 2021 09:58:50 +0100 Subject: [PATCH 093/176] [scholexplorer] -moved SparkRetrieveDataciteDelta in scala folder --- .../eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala (96%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala similarity index 96% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala index 62f53e4ad..8db271941 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala @@ -1,9 +1,9 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.AbstractScalaApplication -import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} -import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import eu.dnetlib.dhp.schema.oaf.Result import org.apache.spark.sql.functions.max +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.Logger class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { From b881ee5ef8b0f12dfaa25e448e34b1f0e6853c54 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 15 Dec 2021 11:25:23 +0100 Subject: [PATCH 094/176] [scholexplorer] - implemented generation of scholix of delta update of datacite --- .../dhp/sx/graph/scholix/ScholixUtils.scala | 57 +++- .../sx/graph/SparkRetrieveDataciteDelta.scala | 291 ++++++++++++++++++ .../sx/graph/SparkRetrieveDataciteDelta.scala | 54 ---- .../sx/graph/RetrieveDataciteDeltaTest.scala | 26 ++ 4 files changed, 364 insertions(+), 64 deletions(-) rename {dhp-workflows/dhp-graph-mapper => dhp-common}/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala (88%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala similarity index 88% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala rename to dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index bf81a26d4..f8010bbd5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -53,8 +53,6 @@ object ScholixUtils { else { summary.getDate.get(0) } - - } def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { @@ -68,7 +66,6 @@ object ScholixUtils { override def zero: RelatedEntities = null override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { - val id = a._1 val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0 val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0 @@ -142,6 +139,14 @@ object ScholixUtils { } + def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = { + if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) { + val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { + d => new ScholixEntityId(d.getProvider.getName, d.getProvider.getIdentifiers) + }(collection.breakOut) + l + } else List() + } def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { @@ -160,7 +165,7 @@ object ScholixUtils { c => new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava) - }(collection breakOut) + }.toList l } else List() } @@ -222,6 +227,38 @@ object ScholixUtils { } + + def scholixFromSource(relation: Relation, source: ScholixResource):Scholix = { + if (relation == null || source == null) + return null + val s = new Scholix + var l: List[ScholixEntityId] = extractCollectedFrom(relation) + if (l.isEmpty) + l = extractCollectedFrom(source) + if (l.isEmpty) + return null + s.setLinkprovider(l.asJava) + var d = extractRelationDate(relation) + if (d == null) + d = source.getPublicationDate + + s.setPublicationDate(d) + + + if (source.getPublisher != null && !source.getPublisher.isEmpty) { + s.setPublisher(source.getPublisher) + } + + val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) + if (semanticRelation == null) + return null + s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) + s.setSource(source) + + s + } + + def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { if (relation == null || source == null) @@ -303,7 +340,7 @@ object ScholixUtils { s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) if (r.getTitle != null && r.getTitle.asScala.nonEmpty) { - val titles: List[String] = r.getTitle.asScala.map(t => t.getValue)(collection breakOut) + val titles: List[String] = r.getTitle.asScala.map(t => t.getValue).toList if (titles.nonEmpty) s.setTitle(titles.asJava) else @@ -311,12 +348,12 @@ object ScholixUtils { } if (r.getAuthor != null && !r.getAuthor.isEmpty) { - val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname)(collection breakOut) - if (authors nonEmpty) + val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList + if (authors.nonEmpty) s.setAuthor(authors.asJava) } if (r.getInstance() != null) { - val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) + val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue).toList if (dt.nonEmpty) s.setDate(dt.distinct.asJava) } @@ -327,7 +364,7 @@ object ScholixUtils { } if (r.getSubject != null && !r.getSubject.isEmpty) { - val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) + val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)).toList if (subjects.nonEmpty) s.setSubject(subjects.asJava) } @@ -336,7 +373,7 @@ object ScholixUtils { s.setPublisher(List(r.getPublisher.getValue).asJava) if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { - val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) + val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete")).toList if (cf.nonEmpty) s.setDatasources(cf.distinct.asJava) } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala new file mode 100644 index 000000000..7f37829ee --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala @@ -0,0 +1,291 @@ +package eu.dnetlib.dhp.sx.graph + +import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.datacite.{DataciteToOAFTransformation, DataciteType} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} +import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixResource} +import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary +import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils +import eu.dnetlib.dhp.utils.{DHPUtils, ISLookupClientFactory} +import org.apache.spark.sql.functions.max +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.Logger + +import scala.collection.JavaConverters._ +import java.text.SimpleDateFormat + +class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { + + val ISO_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ssZ" + val simpleFormatter = new SimpleDateFormat(ISO_DATE_PATTERN) + + val SCHOLIX_RESOURCE_PATH_NAME = "scholixResource" + val DATACITE_OAF_PATH_NAME = "dataciteOAFUpdate" + val PID_MAP_PATH_NAME = "pidMap" + val RESOLVED_REL_PATH_NAME ="resolvedRelation" + val SCHOLIX_PATH_NAME = "scholix" + + + def scholixResourcePath(workingPath:String) = s"$workingPath/$SCHOLIX_RESOURCE_PATH_NAME" + def dataciteOAFPath(workingPath:String) = s"$workingPath/$DATACITE_OAF_PATH_NAME" + def pidMapPath(workingPath:String) = s"$workingPath/$PID_MAP_PATH_NAME" + def resolvedRelationPath(workingPath:String) = s"$workingPath/$RESOLVED_REL_PATH_NAME" + def scholixPath(workingPath:String) = s"$workingPath/$SCHOLIX_PATH_NAME" + + + /** + * Utility to parse Date in ISO8601 to epochMillis + * @param inputDate The String represents an input date in ISO8601 + * @return The relative epochMillis of parsed date + */ + def ISO8601toEpochMillis(inputDate:String):Long = { + simpleFormatter.parse(inputDate).getTime + } + + + /** + * This method tries to retrieve the last collection date from all datacite + * records in HDFS. + * This method should be called before indexing scholexplorer to retrieve + * the delta of Datacite record to download, since from the generation of + * raw graph to the generation of Scholexplorer sometimes it takes 20 days + * @param spark + * @param entitiesPath + * @return the last collection date from the current scholexplorer Graph of the datacite records + */ + def retrieveLastCollectedFrom(spark:SparkSession, entitiesPath:String):Long = { + log.info("Retrieve last entities collected From") + + implicit val oafEncoder:Encoder[Result] = Encoders.kryo[Result] + import spark.implicits._ + + val entitiesDS = spark.read.load(s"$entitiesPath/*").as[Result] + + val date = entitiesDS.filter(r => r.getDateofcollection!= null).map(_.getDateofcollection).select(max("value")).first.getString(0) + + ISO8601toEpochMillis(date) + } + + + /** + * The method of update Datacite relationships on Scholexplorer + * needs some utilities data structures + * One is the scholixResource DS that stores all the nodes in the Scholix Graph + * in format (dnetID, ScholixResource ) + * @param summaryPath the path of the summary in Scholix + * @param workingPath the working path + * @param spark the spark session + */ + def generateScholixResource(summaryPath:String, workingPath: String, spark:SparkSession) :Unit = { + implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + + log.info("Convert All summary to ScholixResource") + spark.read.load(summaryPath).as[ScholixSummary] + .map(ScholixUtils.generateScholixResourceFromSummary) + .filter(r => r.getIdentifier!= null && r.getIdentifier.size>0) + .map(r=> (r.getIdentifier,r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) + .write.mode(SaveMode.Overwrite).save(scholixResourcePath(workingPath)) + + } + + + /** + * This method get and Transform only datacite records with + * timestamp greater than timestamp + * @param datacitePath the datacite input Path + * @param timestamp the timestamp + * @param workingPath the working path where save the generated Dataset + * @param spark SparkSession + * @param vocabularies Vocabularies needed for transformation + */ + + def getDataciteUpdate(datacitePath:String, timestamp:Long, workingPath:String, spark:SparkSession,vocabularies: VocabularyGroup): Unit = { + import spark.implicits._ + val ds = spark.read.load(datacitePath).as[DataciteType] + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + ds.filter(_.timestamp>=timestamp) + .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks = true)) + .flatMap(i => fixRelations(i)).filter(i => i != null) + .write.mode(SaveMode.Overwrite).save(dataciteOAFPath(workingPath)) + + } + + + /** + * This method convert an Instance of OAF Result into + * Scholix Resource + * @param r The input Result + * @return The Scholix Resource + */ + def resultToScholixResource(r:Result):ScholixResource = { + ScholixUtils.generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r)) + } + + + /** + * After added the new ScholixResource, we need to update the scholix Pid Map + * to intersected with the new Datacite Relations + + * @param workingPath The working Path starting from save the new Map + * @param spark the spark session + */ + def generatePidMap(workingPath:String, spark:SparkSession ) :Unit = { + implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + import spark.implicits._ + spark.read.load(scholixResourcePath(workingPath)).as[(String,ScholixResource)] + .flatMap(r=> + r._2.getIdentifier.asScala + .map(i =>DHPUtils.generateUnresolvedIdentifier(i.getIdentifier, i.getSchema)) + .map((_, r._1)) + )(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .groupByKey(_._1) + .reduceGroups((a,b) => if (a!= null && a._2!= null) a else b) + .map(_._2)(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .write.mode(SaveMode.Overwrite).save(pidMapPath(workingPath)) + } + + /** + * This method convert the new Datacite Resource into Scholix Resource + * Needed to fill the source and the type of Scholix Relationships + * @param workingPath the Working Path + * @param spark The spark Session + */ + def addMissingScholixResource(workingPath:String, spark:SparkSession ) :Unit = { + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + + spark.read.load(dataciteOAFPath(workingPath)).as[Oaf] + .filter(_.isInstanceOf[Result]) + .map(_.asInstanceOf[Result]) + .map(resultToScholixResource) + .filter(r => r.getIdentifier!= null && r.getIdentifier.size>0) + .map(r=> (r.getIdentifier,r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) + .write.mode(SaveMode.Append).save(scholixResourcePath(workingPath)) + } + + + /** + * This method resolve the datacite relation and filter the resolved + * relation + * @param workingPath the working path + * @param spark the spark session + */ + + def resolveUpdateRelation(workingPath:String, spark:SparkSession) :Unit = { + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val relationEncoder:Encoder[Relation] = Encoders.kryo[Relation] + import spark.implicits._ + + val pidMap = spark.read.load(pidMapPath(workingPath)).as[(String,String)] + + val unresolvedRelations:Dataset[(String,Relation)] = spark.read.load(dataciteOAFPath(workingPath)).as[Oaf] + .filter(_.isInstanceOf[Relation]) + .map(_.asInstanceOf[Relation]) + .map { r => + if (r.getSource.startsWith("unresolved")) + (r.getSource, r) + else + (r.getTarget, r) + }(Encoders.tuple(Encoders.STRING, relationEncoder)) + + unresolvedRelations + .joinWith(pidMap, unresolvedRelations("_1").equalTo(pidMap("_1"))) + .map(t => { + val r =t._1._2 + val resolvedIdentifier = t._2._2 + if (r.getSource.startsWith("unresolved")) + r.setSource(resolvedIdentifier) + else + r.setTarget(resolvedIdentifier) + r + })(relationEncoder) + .filter(r => !(r.getSource.startsWith("unresolved") || r.getTarget.startsWith("unresolved") )) + .write.mode(SaveMode.Overwrite) + .save(resolvedRelationPath(workingPath)) + } + + + + /** + * This method generate scholix starting from resolved relation + * + * + * @param workingPath + * @param spark + */ + def generateScholixUpdate(workingPath:String, spark:SparkSession) :Unit = { + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val scholixEncoder:Encoder[Scholix] = Encoders.kryo[Scholix] + implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + implicit val relationEncoder:Encoder[Relation] = Encoders.kryo[Relation] + import spark.implicits._ + + val relationss:Dataset[(String, Relation)] = spark.read.load(s"$workingPath/ResolvedRelation").as[Relation].map(r =>(r.getSource,r))(Encoders.tuple(Encoders.STRING, relationEncoder)) + + val id_summary:Dataset[(String,ScholixResource)] = spark.read.load(scholixResourcePath(workingPath)).as[(String,ScholixResource)] + + relationss.joinWith(id_summary, relationss("_1").equalTo(id_summary("_1")),"inner") + .map(t => (t._1._2.getTarget,ScholixUtils.scholixFromSource(t._1._2, t._2._2)))(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .write.mode(SaveMode.Overwrite) + .save(s"$workingPath/scholix_one_verse") + + val source_scholix:Dataset[(String,Scholix)] = spark.read.load(s"$workingPath/scholix_one_verse").as[(String,Scholix)] + + source_scholix.joinWith(id_summary, source_scholix("_1").equalTo(id_summary("_1")),"inner") + .map(t => { + val target = t._2._2 + val scholix = t._1._2 + scholix.setTarget(target) + scholix + })(scholixEncoder).write.mode(SaveMode.Overwrite).save(s"$workingPath/scholix") + } + + + + + + /** + * Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { + val sourcePath = parser.get("sourcePath") + log.info(s"SourcePath is '$sourcePath'") + + val datacitePath = parser.get("datacitePath") + log.info(s"DatacitePath is '$datacitePath'") + + val workingPath = parser.get("workingPath") + log.info(s"workingPath is '$workingPath'") + + val isLookupUrl: String = parser.get("isLookupUrl") + log.info("isLookupUrl: {}", isLookupUrl) + + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) + val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) + require(vocabularies != null) + + + val updateDS:Boolean = "true".equalsIgnoreCase(parser.get("updateDS")) + log.info(s"updateDS is '$updateDS'") + + var lastCollectionDate = 0L + if (updateDS) { + generateScholixResource(s"$sourcePath/provision/summaries", workingPath, spark) + log.info("Retrieve last entities collected From starting from scholix Graph") + lastCollectionDate = retrieveLastCollectedFrom(spark, s"$sourcePath/entities") + } + + getDataciteUpdate(datacitePath, lastCollectionDate, workingPath, spark, vocabularies) + addMissingScholixResource(workingPath,spark) + generatePidMap(workingPath, spark) + resolveUpdateRelation(workingPath,spark) + generateScholixUpdate(workingPath, spark) + + + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala deleted file mode 100644 index 8db271941..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.dhp.sx.graph - -import eu.dnetlib.dhp.application.AbstractScalaApplication -import eu.dnetlib.dhp.schema.oaf.Result -import org.apache.spark.sql.functions.max -import org.apache.spark.sql.{Encoder, Encoders, SparkSession} -import org.slf4j.Logger - -class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { - - - def retrieveLastCollectedFrom(spark:SparkSession, entitiesPath:String):String = { - log.info("Retrieve last entities collected From") - - implicit val oafEncoder:Encoder[Result] = Encoders.kryo[Result] - import spark.implicits._ - - val entitiesDS = spark.read.load(s"$entitiesPath/*").as[Result] - - entitiesDS.filter(r => r.getDateofcollection!= null).map(_.getDateofcollection).select(max("value")).first.getString(0) - - - - } - - - /** - * Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ - override def run(): Unit = { - val sourcePath = parser.get("sourcePath") - log.info(s"SourcePath is '$sourcePath'") - - val datacitePath = parser.get("datacitePath") - log.info(s"DatacitePath is '$datacitePath'") - - - log.info("Retrieve last entities collected From") - - implicit val oafEncoder:Encoder[Result] = Encoders.kryo[Result] - - val lastCollectionDate = retrieveLastCollectedFrom(spark, s"$sourcePath/entities") - - - - - - - - - - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala new file mode 100644 index 000000000..c277b0aa1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala @@ -0,0 +1,26 @@ +package eu.dnetlib.dhp.sx.graph +import org.junit.jupiter.api.Test + +import java.text.SimpleDateFormat + + + +class RetrieveDataciteDeltaTest { + + @Test + def testParsingDate(): Unit = { + + + val inputDate = "2021-12-02T11:17:36+0000" + + val t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").parse(inputDate).getTime + + + println(t) + + + + } + + +} From c1cdec09a9b1db420c8cd1bf056bff0c19362e21 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 20 Dec 2021 19:23:57 +0200 Subject: [PATCH 095/176] Sprint 5 and other changes --- .../scripts/step16-createIndicatorsTables.sql | 144 ++++++++++-------- 1 file changed, 83 insertions(+), 61 deletions(-) mode change 100644 => 100755 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql old mode 100644 new mode 100755 index 926c8825f..9c67da883 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1,28 +1,29 @@ +---- Sprint 1 ---- create table indi_pub_green_oa stored as parquet as -select distinct p.id, coalesce(green_oa, 0) as green_oa +select distinct p.id, coalesce(green_oa, 0) as green_oa from publication p -left outer join ( -select p.id, 1 as green_oa +left outer join ( +select p.id, 1 as green_oa from publication p join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' -and (ri.accessright = 'Open Access' -or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp +and (ri.accessright = 'Open Access' +or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; create table indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit from publication p left outer join ( -select p.id, 1 as grey_lit +select p.id, 1 as grey_lit from publication p join result_classifications rt on rt.id = p.id -where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and +where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; create table indi_pub_doi_from_crossref stored as parquet as -select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref +select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref from publication p left outer join (select ri.id, 1 as doi_from_crossref from result_instance ri @@ -33,7 +34,7 @@ on tmp.id=p.id; create table indi_pub_gold_oa stored as parquet as select distinct p.id, coalesce(gold_oa, 0) as gold_oa from publication p -left outer join ( +left outer join ( select p.id, 1 as gold_oa from publication p join result_instance ri on ri.id = p.id @@ -214,82 +215,85 @@ on p.id= tmp.id; --select year, type, round(no_of_pubs/total*100,3) averageOfPubs --from total; -create table indi_pub_has_cc_licence stored as parquet as -select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license -from publication p -left outer join (select p.id, license.type as lic from publication p -join publication_licenses as license on license.id = p.id +---- Sprint 2 ---- +create table indi_result_has_cc_licence_f stored as parquet as +select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license +from result r +left outer join (select r.id, license.type as lic from result r +join result_licenses as license on license.id = r.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -on p.id= tmp.id; +on r.id= tmp.id; -create table indi_pub_has_cc_licence_url stored as parquet as -select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url -from publication p -left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host -from publication p -join publication_licenses as license on license.id = p.id -WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp -on p.id= tmp.id; +create table indi_result_has_cc_licence_url stored as parquet as +select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url +from result r +left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host +from result r +join result_licenses as license on license.id = r.id +WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp +on r.id= tmp.id; -- EOSC-TR1.1-02M: -- ## Indicator: has_cc_license. Creative Commons licensing has become a -- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied +-- like Plan S. This indicator might be only useful when applied -- to openly available publications. ---create table indi_pub_has_cc_licence_tr stored as parquet as ---select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr ---from publication p +--create table indi_pub_has_cc_licence_tr stored as parquet as +--select distinct p.id, case when lic='' or lic is null then 0 else 1 end indi_result_org_collabas has_cc_license_tr +--from publication p --left outer join (select p.id, license.type as lic from publication p ---join publication_licenses as license on license.id = p.id ---where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +--join publication_licenses as license on license.id = p.id +--where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp --on p.id= tmp.id -- #EOSC-F2-01M_cc Rich metadata for scholarly publications -- ## Indicator: has_cc_license. Creative Commons licensing has become a -- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied +-- like Plan S. This indicator might be only useful when applied -- to openly available publications. -- Same indicator as EOSC-TR1.1-02M (Najko's instructions) -- create table indi_pub_has_cc_licence_f stored as parquet as -- select -- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f --- from publication p --- left outer join (selectp.id,license.type as lic from publication p --- join publication_licenses as license on license.id = p.id --- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +-- from publication p +-- left outer join (selectp.id,license.type as lic from publication p +-- join publication_licenses as license on license.id = p.id +-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -- on p.id= tmp.id create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; -create table indi_result_with_orcid stored as parquet as +create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid -from result r -left outer join (select id, 1 as has_orcid from result_orcid) tmp -on r.id= tmp.id - -create table indi_funded_result_with_fundref stored as parquet as -select distinct r.id, coalesce(fundref, 0) as fundref -from project_results r -left outer join (select distinct id, 1 as fundref from project_results -where provenance='Harvested') tmp +from result r +left outer join (select id, 1 as has_orcid from result_orcid) tmp on r.id= tmp.id -create table indi_result_org_country_collab stored as parquet as -with tmp as +---- Sprint 3 ---- + +create table indi_funded_result_with_fundref stored as parquet as +select distinct r.id, coalesce(fundref, 0) as fundref +from project_results r +left outer join (select distinct id, 1 as fundref from project_results +where provenance='Harvested') tmp +on r.id= tmp.id + +create table indi_result_org_country_collab stored as parquet as +with tmp as (select o.id as id, o.country , ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization join result r on r.id=ro.id where o.country <> 'UNKNOWN') select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations from tmp as o1 join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country +where o1.id<>o2.id and o1.country<>o2.country group by o1.id, o1.type,o2.country -create table indi_result_org_collab stored as parquet as -with tmp as +create table indi_result_org_collab stored as parquet as +with tmp as (select o.id, ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization join result r on r.id=ro.id) @@ -299,19 +303,32 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type -create table indi_funder_country_collab stored as parquet as -with tmp as (select funder, project, country from organization_projects op -join organization o on o.id=op.id -join project p on p.id=op.project +create table indi_funder_country_collab stored as parquet as +with tmp as (select funder, project, country from organization_projects op +join organization o on o.id=op.id +join project p on p.id=op.project where country <> 'UNKNOWN') select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations from tmp as f1 join tmp as f2 on f1.project=f2.project -where f1.country<>f2.country +where f1.country<>f2.country group by f1.funder, f2.country, f1.country +create table indi_result_country_collab stored as parquet as +with tmp as +(select country, ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id) +select o1.country country1, o2.country country2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.country<>o2.country +group by o1.country, o2.country, o1.type + +---- Sprint 4 ---- + create table indi_pub_diamond stored as parquet as -select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal +select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from publication_datasources pd left outer join ( select pd.id, 1 as in_diamond_journal from publication_datasources pd @@ -321,7 +338,7 @@ and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) on pd.id=tmp.id create table indi_pub_hybrid stored as parquet as -select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid from publication_datasources pd left outer join ( select pd.id, 1 as is_hybrid from publication_datasources pd @@ -331,7 +348,7 @@ and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp on pd.id=tmp.id create table indi_is_gold_oa stored as parquet as -(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa +(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa from publication_datasources pd left outer join ( select pd.id, 1 as gold_oa from publication_datasources pd @@ -342,7 +359,7 @@ on pd.id=tmp.id) create table indi_pub_in_transformative stored as parquet as -select distinct pd.id, coalesce(is_transformative, 0) as is_transformative +select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from publication pd left outer join ( select pd.id, 1 as is_transformative from publication_datasources pd @@ -353,10 +370,15 @@ on pd.id=tmp.id create table indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri -left outer join +left outer join (select ri.id, 1 as pub_closed_other_open from result_instance ri join publication p on p.id=ri.id join datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and -(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp -on tmp.id=ri.id \ No newline at end of file +(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp +on tmp.id=ri.id + +---- Sprint 5 ---- + +create table indi_result_no_of_copies stored as parquet as +select id, count(id) as number_of_copies from result_instance group by id \ No newline at end of file From 58996972d9530b1389ee2243e70f2b1bb47db69c Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 21 Dec 2021 03:35:04 +0200 Subject: [PATCH 096/176] added first indicator of sprint 5 --- .../oozie_app/scripts/step20-createMonitorDB.sql | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index fa8e4c6a7..1ae459640 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -115,10 +115,10 @@ create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.i compute stats TARGET.indi_pub_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; -create table TARGET.indi_pub_has_cc_licence stored as parquet as select * from SOURCE.indi_pub_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_has_cc_licence; -create table TARGET.indi_pub_has_cc_licence_url stored as parquet as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_has_cc_licence_url; +create table TARGET.indi_result_has_cc_licence_f stored as parquet as select * from SOURCE.indi_result_has_cc_licence_f orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence_f; +create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence_url; create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab; @@ -135,6 +135,9 @@ compute stats TARGET.indi_pub_in_transformative; create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_closed_other_open; +create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_no_of_copies; + --denorm alter table TARGET.result rename to TARGET.res_tmp; From 3920d68992b1d7f8a67afec5cf8dd3b03037cd29 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 21 Dec 2021 11:41:49 +0100 Subject: [PATCH 097/176] Fixed workflow generation of delta in datacite --- .../dhp/sx/graph/scholix/ScholixUtils.scala | 22 ++- .../datacite/oozie_app/config-default.xml | 19 +++ .../sx/graph/datacite/oozie_app/workflow.xml | 62 +++++++ .../graph/retrieve_datacite_delta_params.json | 41 +++++ .../sx/graph/SparkRetrieveDataciteDelta.scala | 154 +++++++++-------- .../PrepareInfo.java | 21 +-- .../SparkResultToOrganizationFromSemRel.java | 5 - .../StepActions.java | 6 +- .../PrepareInfoJobTest.java | 43 ++--- .../sx/graph/convert_object_json_params.json | 9 +- .../graph/finalGraph/oozie_app/workflow.xml | 62 ------- .../eu/dnetlib/dhp/sx/graph/relations.json | 158 ------------------ .../oozie_app/config-default.xml | 10 ++ .../serializeGraph/oozie_app/workflow.xml | 83 +++++++++ .../sx/graph/SparkConvertObjectToJson.scala | 6 +- 15 files changed, 364 insertions(+), 337 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/workflow.xml diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index f8010bbd5..f35af0905 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -12,7 +12,7 @@ import org.json4s.jackson.JsonMethods.parse import scala.collection.JavaConverters._ import scala.io.Source -object ScholixUtils { +object ScholixUtils extends Serializable { val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" @@ -24,7 +24,7 @@ object ScholixUtils { case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} val relations: Map[String, RelationVocabulary] = { - val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString + val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json")).mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -62,6 +62,11 @@ object ScholixUtils { } + def generateScholixResourceFromResult(r:Result) :ScholixResource = { + generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r)) + } + + val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { override def zero: RelatedEntities = null @@ -184,6 +189,19 @@ object ScholixUtils { } + def generateCompleteScholix(scholix: Scholix, target: ScholixResource): Scholix = { + val s = new Scholix + s.setPublicationDate(scholix.getPublicationDate) + s.setPublisher(scholix.getPublisher) + s.setLinkprovider(scholix.getLinkprovider) + s.setRelationship(scholix.getRelationship) + s.setSource(scholix.getSource) + s.setTarget(target) + s.setIdentifier(DHPUtils.md5(s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}")) + s + } + + def generateScholixResourceFromSummary(summaryObject: ScholixSummary): ScholixResource = { val r = new ScholixResource r.setIdentifier(summaryObject.getLocalIdentifier) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/config-default.xml new file mode 100644 index 000000000..bdd48b0ab --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/config-default.xml @@ -0,0 +1,19 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/workflow.xml new file mode 100644 index 000000000..751b124cf --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/workflow.xml @@ -0,0 +1,62 @@ + + + + sourcePath + the source path of scholix graph + + + datacitePath + the datacite native path + + + workingSupportPath + the working Support path + + + isLookupUrl + The IS lookUp service endopoint + + + updateDS + false + The transformation Rule to apply + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + New Update from Datacite to Scholix + eu.dnetlib.dhp.sx.graph.SparkRetrieveDataciteDelta + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=6000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --datacitePath${datacitePath} + --masteryarn + --workingSupportPath${workingSupportPath} + --isLookupUrl${isLookupUrl} + --updateDS${updateDS} + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json new file mode 100644 index 000000000..78777ffff --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json @@ -0,0 +1,41 @@ +[ + { + "paramName": "s", + "paramLongName": "sourcePath", + "paramDescription": "the source mdstore path", + "paramRequired": true + }, + + { + "paramName": "d", + "paramLongName": "datacitePath", + "paramDescription": "the datacite native path", + "paramRequired": true + }, + + { + "paramName": "w", + "paramLongName": "workingSupportPath", + "paramDescription": "the working Support path", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "isLookupUrl", + "paramDescription": "the isLookup URL", + "paramRequired": true + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "the master name", + "paramRequired": true + }, + { + "paramName": "u", + "paramLongName": "updateDS", + "paramDescription": "Need to regenerate all support Dataset", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala index 7f37829ee..45a6cfc89 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala @@ -9,9 +9,10 @@ import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixResource} import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import eu.dnetlib.dhp.utils.{DHPUtils, ISLookupClientFactory} +import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.sql.functions.max import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.Logger +import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ import java.text.SimpleDateFormat @@ -58,14 +59,15 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L def retrieveLastCollectedFrom(spark:SparkSession, entitiesPath:String):Long = { log.info("Retrieve last entities collected From") - implicit val oafEncoder:Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] import spark.implicits._ - val entitiesDS = spark.read.load(s"$entitiesPath/*").as[Result] + val entitiesDS = spark.read.load(s"$entitiesPath/*").as[Oaf].filter(o =>o.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) val date = entitiesDS.filter(r => r.getDateofcollection!= null).map(_.getDateofcollection).select(max("value")).first.getString(0) - ISO8601toEpochMillis(date) + ISO8601toEpochMillis(date) / 1000 } @@ -73,7 +75,7 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L * The method of update Datacite relationships on Scholexplorer * needs some utilities data structures * One is the scholixResource DS that stores all the nodes in the Scholix Graph - * in format (dnetID, ScholixResource ) + * in format ScholixResource * @param summaryPath the path of the summary in Scholix * @param workingPath the working path * @param spark the spark session @@ -84,11 +86,37 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L log.info("Convert All summary to ScholixResource") spark.read.load(summaryPath).as[ScholixSummary] - .map(ScholixUtils.generateScholixResourceFromSummary) + .map(ScholixUtils.generateScholixResourceFromSummary)(scholixResourceEncoder) .filter(r => r.getIdentifier!= null && r.getIdentifier.size>0) - .map(r=> (r.getIdentifier,r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) - .write.mode(SaveMode.Overwrite).save(scholixResourcePath(workingPath)) + .write.mode(SaveMode.Overwrite).save(s"${scholixResourcePath(workingPath)}_native") + } + /** + * This method convert the new Datacite Resource into Scholix Resource + * Needed to fill the source and the type of Scholix Relationships + * @param workingPath the Working Path + * @param spark The spark Session + */ + def addMissingScholixResource(workingPath:String, spark:SparkSession ) :Unit = { + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + import spark.implicits._ + + spark.read.load(dataciteOAFPath(workingPath)).as[Oaf] + .filter(_.isInstanceOf[Result]) + .map(_.asInstanceOf[Result]) + .map(ScholixUtils.generateScholixResourceFromResult) + .filter(r => r.getIdentifier!= null && r.getIdentifier.size>0) + .write.mode(SaveMode.Overwrite).save(s"${scholixResourcePath(workingPath)}_update") + + val update = spark.read.load(s"${scholixResourcePath(workingPath)}_update").as[ScholixResource] + val native = spark.read.load(s"${scholixResourcePath(workingPath)}_native").as[ScholixResource] + val graph = update.union(native) + .groupByKey(_.getDnetIdentifier) + .reduceGroups((a,b) => if (a!= null && a.getDnetIdentifier!= null) a else b) + .map(_._2) + graph.write.mode(SaveMode.Overwrite).save(s"${scholixResourcePath(workingPath)}_graph") } @@ -102,29 +130,20 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L * @param vocabularies Vocabularies needed for transformation */ - def getDataciteUpdate(datacitePath:String, timestamp:Long, workingPath:String, spark:SparkSession,vocabularies: VocabularyGroup): Unit = { + def getDataciteUpdate(datacitePath:String, timestamp:Long, workingPath:String, spark:SparkSession,vocabularies: VocabularyGroup): Long = { import spark.implicits._ val ds = spark.read.load(datacitePath).as[DataciteType] implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - ds.filter(_.timestamp>=timestamp) - .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks = true)) - .flatMap(i => fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(dataciteOAFPath(workingPath)) - + val total = ds.filter(_.timestamp>=timestamp).count() + if (total >0) { + ds.filter(_.timestamp >= timestamp) + .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks = true)) + .flatMap(i => fixRelations(i)).filter(i => i != null) + .write.mode(SaveMode.Overwrite).save(dataciteOAFPath(workingPath)) + } + total } - - /** - * This method convert an Instance of OAF Result into - * Scholix Resource - * @param r The input Result - * @return The Scholix Resource - */ - def resultToScholixResource(r:Result):ScholixResource = { - ScholixUtils.generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r)) - } - - /** * After added the new ScholixResource, we need to update the scholix Pid Map * to intersected with the new Datacite Relations @@ -135,11 +154,11 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L def generatePidMap(workingPath:String, spark:SparkSession ) :Unit = { implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] import spark.implicits._ - spark.read.load(scholixResourcePath(workingPath)).as[(String,ScholixResource)] + spark.read.load(s"${scholixResourcePath(workingPath)}_graph").as[ScholixResource] .flatMap(r=> - r._2.getIdentifier.asScala + r.getIdentifier.asScala .map(i =>DHPUtils.generateUnresolvedIdentifier(i.getIdentifier, i.getSchema)) - .map((_, r._1)) + .map(t =>(t, r.getDnetIdentifier)) )(Encoders.tuple(Encoders.STRING, Encoders.STRING)) .groupByKey(_._1) .reduceGroups((a,b) => if (a!= null && a._2!= null) a else b) @@ -147,27 +166,6 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L .write.mode(SaveMode.Overwrite).save(pidMapPath(workingPath)) } - /** - * This method convert the new Datacite Resource into Scholix Resource - * Needed to fill the source and the type of Scholix Relationships - * @param workingPath the Working Path - * @param spark The spark Session - */ - def addMissingScholixResource(workingPath:String, spark:SparkSession ) :Unit = { - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] - implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] - - spark.read.load(dataciteOAFPath(workingPath)).as[Oaf] - .filter(_.isInstanceOf[Result]) - .map(_.asInstanceOf[Result]) - .map(resultToScholixResource) - .filter(r => r.getIdentifier!= null && r.getIdentifier.size>0) - .map(r=> (r.getIdentifier,r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) - .write.mode(SaveMode.Append).save(scholixResourcePath(workingPath)) - } - - /** * This method resolve the datacite relation and filter the resolved * relation @@ -222,25 +220,26 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L implicit val scholixEncoder:Encoder[Scholix] = Encoders.kryo[Scholix] implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] implicit val relationEncoder:Encoder[Relation] = Encoders.kryo[Relation] - import spark.implicits._ + implicit val intermediateEncoder :Encoder[(String,Scholix)] = Encoders.tuple(Encoders.STRING, scholixEncoder) - val relationss:Dataset[(String, Relation)] = spark.read.load(s"$workingPath/ResolvedRelation").as[Relation].map(r =>(r.getSource,r))(Encoders.tuple(Encoders.STRING, relationEncoder)) - val id_summary:Dataset[(String,ScholixResource)] = spark.read.load(scholixResourcePath(workingPath)).as[(String,ScholixResource)] + val relations:Dataset[(String, Relation)] = spark.read.load(resolvedRelationPath(workingPath)).as[Relation].map(r =>(r.getSource,r))(Encoders.tuple(Encoders.STRING, relationEncoder)) - relationss.joinWith(id_summary, relationss("_1").equalTo(id_summary("_1")),"inner") - .map(t => (t._1._2.getTarget,ScholixUtils.scholixFromSource(t._1._2, t._2._2)))(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .write.mode(SaveMode.Overwrite) - .save(s"$workingPath/scholix_one_verse") + val id_summary:Dataset[(String,ScholixResource)] = spark.read.load(s"${scholixResourcePath(workingPath)}_graph").as[ScholixResource].map(r => (r.getDnetIdentifier,r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) - val source_scholix:Dataset[(String,Scholix)] = spark.read.load(s"$workingPath/scholix_one_verse").as[(String,Scholix)] + id_summary.cache() + + relations.joinWith(id_summary, relations("_1").equalTo(id_summary("_1")),"inner") + .map(t => (t._1._2.getTarget,ScholixUtils.scholixFromSource(t._1._2, t._2._2))) + .write.mode(SaveMode.Overwrite).save(s"$workingPath/scholix_one_verse") + + val source_scholix:Dataset[(String, Scholix)] =spark.read.load(s"$workingPath/scholix_one_verse").as[(String,Scholix)] source_scholix.joinWith(id_summary, source_scholix("_1").equalTo(id_summary("_1")),"inner") .map(t => { - val target = t._2._2 - val scholix = t._1._2 - scholix.setTarget(target) - scholix + val target:ScholixResource =t._2._2 + val scholix:Scholix = t._1._2 + ScholixUtils.generateCompleteScholix(scholix,target) })(scholixEncoder).write.mode(SaveMode.Overwrite).save(s"$workingPath/scholix") } @@ -259,7 +258,7 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L val datacitePath = parser.get("datacitePath") log.info(s"DatacitePath is '$datacitePath'") - val workingPath = parser.get("workingPath") + val workingPath = parser.get("workingSupportPath") log.info(s"workingPath is '$workingPath'") val isLookupUrl: String = parser.get("isLookupUrl") @@ -279,13 +278,28 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L log.info("Retrieve last entities collected From starting from scholix Graph") lastCollectionDate = retrieveLastCollectedFrom(spark, s"$sourcePath/entities") } + else { + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) + fs.delete(new Path(s"${scholixResourcePath(workingPath)}_native"), true) + fs.rename(new Path(s"${scholixResourcePath(workingPath)}_graph"), new Path(s"${scholixResourcePath(workingPath)}_native")) + lastCollectionDate = retrieveLastCollectedFrom(spark, dataciteOAFPath(workingPath)) + } - getDataciteUpdate(datacitePath, lastCollectionDate, workingPath, spark, vocabularies) - addMissingScholixResource(workingPath,spark) - generatePidMap(workingPath, spark) - resolveUpdateRelation(workingPath,spark) - generateScholixUpdate(workingPath, spark) - - + val numRecords = getDataciteUpdate(datacitePath, lastCollectionDate, workingPath, spark, vocabularies) + if (numRecords>0) { + addMissingScholixResource(workingPath,spark) + generatePidMap(workingPath, spark) + resolveUpdateRelation(workingPath,spark) + generateScholixUpdate(workingPath, spark) + } + } +} + + +object SparkRetrieveDataciteDelta { + val log: Logger = LoggerFactory.getLogger(SparkRetrieveDataciteDelta.getClass) + + def main(args: Array[String]): Unit = { + new SparkRetrieveDataciteDelta("/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json", args, log).initialize().run() } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java index 707462f24..23909fd9a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -33,13 +33,14 @@ public class PrepareInfo implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class); // associate orgs with all their parent - private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet " + + private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet " + + "FROM relation " + "WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() + "' and datainfo.deletedbyinference = false " + "GROUP BY target"; - //associates results with all the orgs they are affiliated to + // associates results with all the orgs they are affiliated to private static final String RESULT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " + "FROM relation " + "WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + @@ -88,7 +89,7 @@ public class PrepareInfo implements Serializable { childParentPath, leavesPath, resultOrganizationPath, - relationPath)); + relationPath)); } private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, @@ -113,13 +114,13 @@ public class PrepareInfo implements Serializable { .json(resultOrganizationPath); relation - .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(relationPath); + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(relationPath); Dataset children = spark .sql( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index e26276f53..9ceebf222 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.resulttoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; - import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.io.Serializable; @@ -22,13 +21,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.KeyValueSet; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; - public class SparkResultToOrganizationFromSemRel implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); private static final int MAX_ITERATION = 5; @@ -201,6 +198,4 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { .json(outputPath); } - - } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java index 02444cb15..1adbbe60e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -75,8 +75,8 @@ public class StepActions implements Serializable { ret.setValueSet(orgs); return ret; }, Encoders.bean(KeyValueSet.class)) - .write() - .mode(SaveMode.Overwrite) + .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); } @@ -179,7 +179,6 @@ public class StepActions implements Serializable { "GROUP BY resId") .as(Encoders.bean(KeyValueSet.class)); - // create new relations from result to organization for each result linked to a leaf return resultParent .flatMap( @@ -200,7 +199,6 @@ public class StepActions implements Serializable { .iterator(), Encoders.bean(Relation.class)); - } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java index 21d99321b..2d2668db3 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java @@ -84,7 +84,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", - "-relationPath", workingDir.toString() + "/relation" + "-relationPath", workingDir.toString() + "/relation" }); @@ -229,7 +229,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", - "-relationPath", workingDir.toString() + "/relation" + "-relationPath", workingDir.toString() + "/relation" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -335,34 +335,35 @@ public class PrepareInfoJobTest { } @Test - public void relationTest()throws Exception { + public void relationTest() throws Exception { PrepareInfo - .main( - new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-graphPath", getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") - .getPath(), - "-hive_metastore_uris", "", - "-leavesPath", workingDir.toString() + "/currentIteration/", - "-resultOrgPath", workingDir.toString() + "/resultOrganization/", - "-childParentPath", workingDir.toString() + "/childParentOrg/", - "-relationPath", workingDir.toString() + "/relation" + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" - }); + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); Assertions.assertEquals(7, verificationDs.count()); } + @Test public void resultOrganizationTest1() throws Exception { @@ -378,7 +379,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", - "-relationPath", workingDir.toString() + "/relation" + "-relationPath", workingDir.toString() + "/relation" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -512,7 +513,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", - "-relationPath", workingDir.toString() + "/relation" + "-relationPath", workingDir.toString() + "/relation" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -539,7 +540,7 @@ public class PrepareInfoJobTest { "-leavesPath", workingDir.toString() + "/currentIteration/", "-resultOrgPath", workingDir.toString() + "/resultOrganization/", "-childParentPath", workingDir.toString() + "/childParentOrg/", - "-relationPath", workingDir.toString() + "/relation" + "-relationPath", workingDir.toString() + "/relation" }); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json index 4b15da623..890570a0b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json @@ -1,6 +1,7 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true}, - {"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"su", "paramLongName":"scholixUpdatePath", "paramDescription": "the scholix updated Path", "paramRequired": false}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml index 85c0d486d..e46e59cc0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml @@ -90,68 +90,6 @@ --relationPath${targetPath}/relation - - -
- - - - - - - - - - - - - yarn - cluster - Serialize scholix to JSON - eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=6000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --sourcePath${targetPath}/provision/scholix/scholix - --targetPath${targetPath}/index/scholix_json - --objectTypescholix - - - - - - - - - yarn - cluster - Serialize summary to JSON - eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=6000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --sourcePath${targetPath}/provision/summaries_filtered - --targetPath${targetPath}/index/summaries_json - --objectTypesummary - diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json deleted file mode 100644 index 98e8daa18..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json +++ /dev/null @@ -1,158 +0,0 @@ -{ - "cites":{ - "original":"Cites", - "inverse":"IsCitedBy" - }, - "compiles":{ - "original":"Compiles", - "inverse":"IsCompiledBy" - }, - "continues":{ - "original":"Continues", - "inverse":"IsContinuedBy" - }, - "derives":{ - "original":"IsSourceOf", - "inverse":"IsDerivedFrom" - }, - "describes":{ - "original":"Describes", - "inverse":"IsDescribedBy" - }, - "documents":{ - "original":"Documents", - "inverse":"IsDocumentedBy" - }, - "hasmetadata":{ - "original":"HasMetadata", - "inverse":"IsMetadataOf" - }, - "hasassociationwith":{ - "original":"HasAssociationWith", - "inverse":"HasAssociationWith" - }, - "haspart":{ - "original":"HasPart", - "inverse":"IsPartOf" - }, - "hasversion":{ - "original":"HasVersion", - "inverse":"IsVersionOf" - }, - "iscitedby":{ - "original":"IsCitedBy", - "inverse":"Cites" - }, - "iscompiledby":{ - "original":"IsCompiledBy", - "inverse":"Compiles" - }, - "iscontinuedby":{ - "original":"IsContinuedBy", - "inverse":"Continues" - }, - "isderivedfrom":{ - "original":"IsDerivedFrom", - "inverse":"IsSourceOf" - }, - "isdescribedby":{ - "original":"IsDescribedBy", - "inverse":"Describes" - }, - "isdocumentedby":{ - "original":"IsDocumentedBy", - "inverse":"Documents" - }, - "isidenticalto":{ - "original":"IsIdenticalTo", - "inverse":"IsIdenticalTo" - }, - "ismetadatafor":{ - "original":"IsMetadataFor", - "inverse":"IsMetadataOf" - }, - "ismetadataof":{ - "original":"IsMetadataOf", - "inverse":"IsMetadataFor" - }, - "isnewversionof":{ - "original":"IsNewVersionOf", - "inverse":"IsPreviousVersionOf" - }, - "isobsoletedby":{ - "original":"IsObsoletedBy", - "inverse":"Obsoletes" - }, - "isoriginalformof":{ - "original":"IsOriginalFormOf", - "inverse":"IsVariantFormOf" - }, - "ispartof":{ - "original":"IsPartOf", - "inverse":"HasPart" - }, - "ispreviousversionof":{ - "original":"IsPreviousVersionOf", - "inverse":"IsNewVersionOf" - }, - "isreferencedby":{ - "original":"IsReferencedBy", - "inverse":"References" - }, - "isrelatedto":{ - "original":"IsRelatedTo", - "inverse":"IsRelatedTo" - }, - "isrequiredby":{ - "original":"IsRequiredBy", - "inverse":"Requires" - }, - "isreviewedby":{ - "original":"IsReviewedBy", - "inverse":"Reviews" - }, - "issourceof":{ - "original":"IsSourceOf", - "inverse":"IsDerivedFrom" - }, - "issupplementedby":{ - "original":"IsSupplementedBy", - "inverse":"IsSupplementTo" - }, - "issupplementto":{ - "original":"IsSupplementTo", - "inverse":"IsSupplementedBy" - }, - "isvariantformof":{ - "original":"IsVariantFormOf", - "inverse":"IsOriginalFormOf" - }, - "isversionof":{ - "original":"IsVersionOf", - "inverse":"HasVersion" - }, - "obsoletes":{ - "original":"Obsoletes", - "inverse":"IsObsoletedBy" - }, - "references":{ - "original":"References", - "inverse":"IsReferencedBy" - }, - "requires":{ - "original":"Requires", - "inverse":"IsRequiredBy" - }, - "related":{ - "original":"IsRelatedTo", - "inverse":"IsRelatedTo" - }, - "reviews":{ - "original":"Reviews", - "inverse":"IsReviewedBy" - }, - "unknown":{ - "original":"Unknown", - "inverse":"Unknown" - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/config-default.xml new file mode 100644 index 000000000..6fb2a1253 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/config-default.xml @@ -0,0 +1,10 @@ + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/workflow.xml new file mode 100644 index 000000000..2844d7baa --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/workflow.xml @@ -0,0 +1,83 @@ + + + + scholixUpdatePath + the working dir base path of the scholix updated + + + targetPath + the final graph path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + yarn + cluster + Serialize scholix to JSON + eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=6000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --sourcePath${targetPath}/provision/scholix/scholix + --targetPath${targetPath}/index/scholix_json + --scholixUpdatePath${scholixUpdatePath} + --objectTypescholix + + + + + + + + + yarn + cluster + Serialize summary to JSON + eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=6000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --sourcePath${targetPath}/provision/summaries_filtered + --targetPath${targetPath}/index/summaries_json + --objectTypesummary + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index cc1b97fd6..0c54de7c8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -30,6 +30,9 @@ object SparkConvertObjectToJson { log.info(s"targetPath -> $targetPath") val objectType = parser.get("objectType") log.info(s"objectType -> $objectType") + val scholixUpdatePath = parser.get("scholixUpdatePath") + log.info(s"scholixUpdatePath -> $scholixUpdatePath") + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] @@ -42,7 +45,8 @@ object SparkConvertObjectToJson { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) + val u :Dataset[Scholix]= spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix] + d.union(u).repartition(8000).map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] From 460e6b95d63b04c4977be1302eca2f58e7259668 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 21 Dec 2021 13:48:03 +0100 Subject: [PATCH 098/176] [Graph Dump] - --- .../graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml index f84ab7e1a..8eab56992 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -325,6 +325,7 @@ --sourcePath${workingDir}/validrelation --outputPath${workingDir}/relation/relation + --removeSet${removeSet} From a4c0cbb98c35f833b36183d075ba64845ba98ddc Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 21 Dec 2021 15:54:38 +0200 Subject: [PATCH 099/176] fixed typos in indicators. Added extra views in monitor --- .../scripts/step16-createIndicatorsTables.sql | 234 ++---------------- .../scripts/step20-createMonitorDB.sql | 12 +- 2 files changed, 23 insertions(+), 223 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 9c67da883..bdd681f8d 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -42,181 +42,8 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; ---create table indi_project_pubs_count stored as parquet as ---select pr.id id, count(p.id) total_pubs from project_results pr ---join publication p on p.id=pr.result ---group by pr.id; - ---create table indi_project_datasets_count stored as parquet as ---select pr.id id, count(d.id) total_datasets from project_results pr ---join dataset d on d.id=pr.result ---group by pr.id; - ---create table indi_project_software_count stored as parquet as ---select pr.id id, count(s.id) total_software from project_results pr ---join software s on s.id=pr.result ---group by pr.id; - ---create table indi_project_otherresearch_count stored as parquet as ---select pr.id id, count(o.id) total_other from project_results pr ---join otherresearchproduct o on o.id=pr.result ---group by pr.id; - ---create table indi_pub_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - --from - --(SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 - --ELSE 0 - --END) AS OpenAccess, SUM(CASE - --WHEN bestlicence<>'Open Access' THEN 1 - --ELSE 0 - --END) AS NonOpenAccess - --FROM publication p - --join result_organization ro on p.id=ro.id - --join organization o on o.id=ro.organization - --where cast(year as int)>=2003 and cast(year as int)<=2021 - --group by year, country) tmp; - ---create table indi_dataset_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA ---from - --(SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 - --ELSE 0 - --END) AS OpenAccess, SUM(CASE - --WHEN bestlicence<>'Open Access' THEN 1 - --ELSE 0 - --END) AS NonOpenAccess - --FROM dataset d - --join result_organization ro on d.id=ro.id - --join organization o on o.id=ro.organization - --where cast(year as int)>=2003 and cast(year as int)<=2021 - --group by year, country) tmp; - ---create table indi_software_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA ---from --- (SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 --- ELSE 0 ---END) AS OpenAccess, SUM(CASE --- WHEN bestlicence<>'Open Access' THEN 1 --- ELSE 0 --- END) AS NonOpenAccess --- FROM software s --- join result_organization ro on s.id=ro.id --- join organization o on o.id=ro.organization --- where cast(year as int)>=2003 and cast(year as int)<=2021 --- group by year, country) tmp; - - ---create table indi_other_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA --- from --- (SELECT year, country, SUM(CASE --- WHEN bestlicence='Open Access' THEN 1 --- ELSE 0 --- END) AS OpenAccess, SUM(CASE --- WHEN bestlicence<>'Open Access' THEN 1 --- ELSE 0 --- END) AS NonOpenAccess --- FROM otherresearchproduct orp --- join result_organization ro on orp.id=ro.id --- join organization o on o.id=ro.organization --- where cast(year as int)>=2003 and cast(year as int)<=2021 --- group by year, country) tmp; - ---create table indi_pub_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join publication p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofpubs ---from total; - ---create table indi_dataset_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join dataset p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofdataset ---from total; - ---create table indi_software_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join software p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofsoftware ---from total; - ---create table indi_other_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join otherresearchproduct p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofother ---from total; - ---create table indi_other_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from otherresearchproduct_datasources pd ---join datasource d on datasource=d.id ---join otherresearchproduct p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct ---from total; - ---create table indi_software_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from software_datasources pd ---join datasource d on datasource=d.id ---join software p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfSoftware ---from total; - ---create table indi_dataset_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from dataset_datasources pd ---join datasource d on datasource=d.id ---join dataset p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfDatasets ---from total; - ---create table indi_pub_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from publication_datasources pd ---join datasource d on datasource=d.id ---join publication p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfPubs ---from total; - ---- Sprint 2 ---- -create table indi_result_has_cc_licence_f stored as parquet as +create table indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license from result r left outer join (select r.id, license.type as lic from result r @@ -233,35 +60,6 @@ join result_licenses as license on license.id = r.id WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; --- EOSC-TR1.1-02M: --- ## Indicator: has_cc_license. Creative Commons licensing has become a --- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied --- to openly available publications. ---create table indi_pub_has_cc_licence_tr stored as parquet as ---select distinct p.id, case when lic='' or lic is null then 0 else 1 end indi_result_org_collabas has_cc_license_tr ---from publication p ---left outer join (select p.id, license.type as lic from publication p ---join publication_licenses as license on license.id = p.id ---where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp ---on p.id= tmp.id - --- #EOSC-F2-01M_cc Rich metadata for scholarly publications --- ## Indicator: has_cc_license. Creative Commons licensing has become a --- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied --- to openly available publications. - --- Same indicator as EOSC-TR1.1-02M (Najko's instructions) --- create table indi_pub_has_cc_licence_f stored as parquet as --- select --- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f --- from publication p --- left outer join (selectp.id,license.type as lic from publication p --- join publication_licenses as license on license.id = p.id --- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp --- on p.id= tmp.id - create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; @@ -270,16 +68,16 @@ create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp -on r.id= tmp.id +on r.id= tmp.id; + ---- Sprint 3 ---- - create table indi_funded_result_with_fundref stored as parquet as select distinct r.id, coalesce(fundref, 0) as fundref from project_results r left outer join (select distinct id, 1 as fundref from project_results where provenance='Harvested') tmp -on r.id= tmp.id +on r.id= tmp.id; create table indi_result_org_country_collab stored as parquet as with tmp as @@ -290,7 +88,7 @@ select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as col from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +group by o1.id, o1.type,o2.country; create table indi_result_org_collab stored as parquet as with tmp as @@ -301,7 +99,7 @@ select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaboratio from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id -group by o1.id, o2.id, o1.type +group by o1.id, o2.id, o1.type; create table indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from organization_projects op @@ -312,7 +110,7 @@ select f1.funder, f1.country, f2.country, count(distinct f1.project) as collabor from tmp as f1 join tmp as f2 on f1.project=f2.project where f1.country<>f2.country -group by f1.funder, f2.country, f1.country +group by f1.funder, f2.country, f1.country; create table indi_result_country_collab stored as parquet as with tmp as @@ -323,10 +121,9 @@ select o1.country country1, o2.country country2, o1.type, count(distinct o1.resu from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.country<>o2.country -group by o1.country, o2.country, o1.type +group by o1.country, o2.country, o1.type; ---- Sprint 4 ---- - create table indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from publication_datasources pd @@ -335,7 +132,7 @@ select pd.id, 1 as in_diamond_journal from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_pub_hybrid stored as parquet as select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid @@ -345,7 +142,7 @@ select pd.id, 1 as is_hybrid from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_is_gold_oa stored as parquet as (select distinct pd.id, coalesce(gold_oa, 0) as gold_oa @@ -355,8 +152,7 @@ select pd.id, 1 as gold_oa from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp -on pd.id=tmp.id) - +on pd.id=tmp.id); create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative @@ -366,7 +162,7 @@ select pd.id, 1 as is_transformative from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and ps.is_transformative_journal=true) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri @@ -376,9 +172,9 @@ join publication p on p.id=ri.id join datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp -on tmp.id=ri.id +on tmp.id=ri.id; + ---- Sprint 5 ---- - create table indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from result_instance group by id \ No newline at end of file +select id, count(id) as number_of_copies from result_instance group by id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 1ae459640..9bd90aa1f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -49,8 +49,11 @@ compute stats TARGET.result_greenoa; create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_languages; -create table TARGET.result_licences stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_licences; +create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_licenses; + +create table TARGET.licenses_normalized stored as parquet as select * from SOURCE.licenses_normalized orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.licenses_normalized; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; @@ -100,6 +103,7 @@ create view if not exists TARGET.project as select * from SOURCE.project; create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; +create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; compute stats TARGET.project_results; @@ -115,8 +119,8 @@ create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.i compute stats TARGET.indi_pub_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; -create table TARGET.indi_result_has_cc_licence_f stored as parquet as select * from SOURCE.indi_result_has_cc_licence_f orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_result_has_cc_licence_f; +create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; From 3edd661608808e54cbdd8dab614b0047ab159965 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 21 Dec 2021 22:55:04 +0200 Subject: [PATCH 100/176] fixed column names --- .../stats/oozie_app/scripts/step16-createIndicatorsTables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index bdd681f8d..9f11fa49d 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -106,7 +106,7 @@ with tmp as (select funder, project, country from organization_projects op join organization o on o.id=op.id join project p on p.id=op.project where country <> 'UNKNOWN') -select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations +select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations from tmp as f1 join tmp as f2 on f1.project=f2.project where f1.country<>f2.country From 16539d7360b8a5701aeac5b478ebfe5c525733de Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 22 Dec 2021 02:54:42 +0200 Subject: [PATCH 101/176] added usage stats --- .../oa/graph/stats/oozie_app/scripts/step10.sql | 16 ++++++++++++++++ .../oozie_app/scripts/step20-createMonitorDB.sql | 3 +++ 2 files changed, 19 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index fc0162a9c..13a4803a9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -27,6 +27,22 @@ CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS SELECT * FROM ${external_stats_db_name}.licenses_normalized; +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +-- Usage statistics +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +create or replace view ${stats_db_name}.usage_stats as +select * from openaire_prod_usage_stats.usage_stats; + +create or replace view ${stats_db_name}.downloads_stats as +select * from openaire_prod_usage_stats.downloads_stats; + +create or replace view ${stats_db_name}.pageviews_stats as +select * from openaire_prod_usage_stats.pageviews_stats; + +create or replace view ${stats_db_name}.views_stats as +select * from openaire_prod_usage_stats.views_stats; ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 9bd90aa1f..d142f5d68 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -142,6 +142,9 @@ compute stats TARGET.indi_pub_closed_other_open; create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_no_of_copies; +--- Usage statistics +create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); + --denorm alter table TARGET.result rename to TARGET.res_tmp; From 8d185000694d05f61cdbf521309357dc0a328695 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 22 Dec 2021 12:47:21 +0100 Subject: [PATCH 102/176] using dhp-schema:2.9.24 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b95bd9acb..ed7b8a2ca 100644 --- a/pom.xml +++ b/pom.xml @@ -758,7 +758,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.24-SNAPSHOT] + [2.9.24] [4.0.3] [6.0.5] [3.1.6] From c5739c4266d03e452af6dad33b4f362c4960536e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 15:08:33 +0100 Subject: [PATCH 103/176] [BipFinder] create action set for the measures at the level of the result --- .../bipfinder/SparkAtomicActionScoreJob.java | 6 +-- .../{bipfinder => common}/BipDeserialize.java | 4 +- .../{bipfinder => common}/BipScore.java | 4 +- .../{bipfinder => common}/Constants.java | 2 +- .../createunresolvedentities/Constants.java | 49 ------------------- .../PrepareBipFinder.java | 5 +- .../PrepareFOSSparkJob.java | 2 +- .../SparkSaveUnresolved.java | 2 +- 8 files changed, 14 insertions(+), 60 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => common}/BipDeserialize.java (83%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => common}/BipScore.java (84%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => common}/Constants.java (96%) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index 8cda19d07..f3f1db222 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -16,16 +16,16 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.common.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.common.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java similarity index 83% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java index d5b2ced7c..d29d3bdde 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java @@ -1,11 +1,13 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.common; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import eu.dnetlib.dhp.actionmanager.bipfinder.Score; + /** * Class that maps the model of the bipFinder! input data. * Only needed for deserialization purposes diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java similarity index 84% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java index 247546694..6bfce2c0f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java @@ -1,9 +1,11 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.common; import java.io.Serializable; import java.util.List; +import eu.dnetlib.dhp.actionmanager.bipfinder.Score; + /** * Rewriting of the bipFinder input data by extracting the identifier of the result (doi) */ diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java index 0e08fd06c..e91d41438 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.common; import java.util.Optional; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java deleted file mode 100644 index c508d4dbc..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java +++ /dev/null @@ -1,49 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities; - -import java.util.Optional; - -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class Constants { - - public static final String DOI = "doi"; - - public static final String UPDATE_DATA_INFO_TYPE = "update"; - public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; - public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; - public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; - - public static final String FOS_CLASS_ID = "FOS"; - public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; - - public static final String NULL = "NULL"; - - public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private Constants() { - } - - public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) { - return Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - } - - public static Dataset readPath( - SparkSession spark, String inputPath, Class clazz) { - return spark - .read() - .textFile(inputPath) - .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 3d68db27b..c850012c7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.UPDATE_CLASS_NAME; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.UPDATE_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -11,7 +11,6 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.hdfs.client.HdfsUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 5ae2f8c88..5f750b025 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index 62b813602..08e63ee1e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; From b5e11a3a0ad8e7133dd573044d87daa12dba90e5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 15:33:05 +0100 Subject: [PATCH 104/176] [BipFinder] put in common package BipFinder model --- .../bipfinder/SparkAtomicActionScoreJob.java | 6 ++-- .../model => bipmodel}/BipDeserialize.java | 2 +- .../model => bipmodel}/BipScore.java | 2 +- .../{common => bipmodel}/Constants.java | 2 +- .../{bipfinder => bipmodel}/KeyValue.java | 2 +- .../{bipfinder => bipmodel}/Score.java | 2 +- .../actionmanager/common/BipDeserialize.java | 30 ----------------- .../dhp/actionmanager/common/BipScore.java | 32 ------------------- .../PrepareBipFinder.java | 9 +++--- .../PrepareFOSSparkJob.java | 2 +- .../SparkSaveUnresolved.java | 2 +- .../model/KeyValue.java | 26 --------------- .../createunresolvedentities/model/Score.java | 30 ----------------- 13 files changed, 15 insertions(+), 132 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{createunresolvedentities/model => bipmodel}/BipDeserialize.java (87%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{createunresolvedentities/model => bipmodel}/BipScore.java (88%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{common => bipmodel}/Constants.java (96%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => bipmodel}/KeyValue.java (87%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => bipmodel}/Score.java (89%) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index f3f1db222..fafa209ea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; -import static eu.dnetlib.dhp.actionmanager.common.Constants.*; +import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -24,8 +24,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.common.BipDeserialize; -import eu.dnetlib.dhp.actionmanager.common.BipScore; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipDeserialize.java similarity index 87% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipDeserialize.java index f950d9260..a70bca618 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipDeserialize.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; import java.util.ArrayList; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipScore.java similarity index 88% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipScore.java index c36856a5b..1ce20eaf4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipScore.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; import java.util.List; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Constants.java similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Constants.java index e91d41438..131d5fa07 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Constants.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.common; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.util.Optional; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/KeyValue.java similarity index 87% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/KeyValue.java index 6909a9634..33efc8ea0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/KeyValue.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Score.java similarity index 89% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Score.java index 7cc21b44d..5b3095cf2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Score.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; import java.util.List; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java deleted file mode 100644 index d29d3bdde..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java +++ /dev/null @@ -1,30 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.common; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -import eu.dnetlib.dhp.actionmanager.bipfinder.Score; - -/** - * Class that maps the model of the bipFinder! input data. - * Only needed for deserialization purposes - */ - -public class BipDeserialize extends HashMap> implements Serializable { - - public BipDeserialize() { - super(); - } - - public List get(String key) { - - if (super.get(key) == null) { - return new ArrayList<>(); - } - return super.get(key); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java deleted file mode 100644 index 6bfce2c0f..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java +++ /dev/null @@ -1,32 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.common; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.actionmanager.bipfinder.Score; - -/** - * Rewriting of the bipFinder input data by extracting the identifier of the result (doi) - */ - -public class BipScore implements Serializable { - private String id; // doi - private List scoreList; // unit as given in the inputfile - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getScoreList() { - return scoreList; - } - - public void setScoreList(List scoreList) { - this.scoreList = scoreList; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index c850012c7..c7049430e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.common.Constants.*; -import static eu.dnetlib.dhp.actionmanager.common.Constants.UPDATE_CLASS_NAME; +import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; +import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.UPDATE_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -23,8 +23,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipDeserialize; -import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipScore; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -88,6 +88,7 @@ public class PrepareBipFinder implements Serializable { BipScore bs = new BipScore(); bs.setId(key); bs.setScoreList(entry.get(key)); + return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) .map((MapFunction) v -> { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 5f750b025..558f51f3f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.common.Constants.*; +import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index 08e63ee1e..918f6a417 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.common.Constants.*; +import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java deleted file mode 100644 index 4384e4ba1..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java +++ /dev/null @@ -1,26 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; - -import java.io.Serializable; - -public class KeyValue implements Serializable { - - private String key; - private String value; - - public String getKey() { - return key; - } - - public void setKey(String key) { - this.key = key; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java deleted file mode 100644 index 3d1cca9a0..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java +++ /dev/null @@ -1,30 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; - -import java.io.Serializable; -import java.util.List; - -/** - * represents the score in the input file - */ -public class Score implements Serializable { - - private String id; - private List unit; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getUnit() { - return unit; - } - - public void setUnit(List unit) { - this.unit = unit; - } -} From 0807fdb65adc2e07e8aafc9be8a90fc4083e4d92 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 15:37:00 +0100 Subject: [PATCH 105/176] [BipFinder] remove not needed resources --- .../SparkAtomicActionScoreJobTest.java | 4 - .../actionmanager/bipfinder/bip_scores.json | 1101 ----------------- .../actionmanager/bipfinder/publication.json | 18 - .../bipfinder/publication_2.json | 18 - .../bipfinder/publication_3.json | 18 - .../bipfinder/publicationoaid.json | 18 - 6 files changed, 1177 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index c4a84cb46..be82b9fc3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -74,10 +74,6 @@ public class SparkAtomicActionScoreJobTest { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json") .getPath(); - String inputPath = getClass() - .getResource( - "/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json") - .getPath(); SparkAtomicActionScoreJob .main( diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json deleted file mode 100644 index a4f3fa8b8..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json +++ /dev/null @@ -1,1101 +0,0 @@ -{"10.1001/10-v4n2-hsf10003": [{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}]} -{"10.1001/2012.jama.10456": [{"id": "influence", "unit": [{"value": "1.66083991022e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8690688", "key": "score"}]}]} -{"10.1001/2013.jamadermatol.729": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0279936", "key": "score"}]}]} -{"10.1001/archderm.107.6.853": [{"id": "influence", "unit": [{"value": "1.95361113607e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600657255944", "key": "score"}]}]} -{"10.1001/archderm.1926.02370130020002": [{"id": "influence", "unit": [{"value": "1.48611319422e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.20903253662e-16", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.157": [{"id": "influence", "unit": [{"value": "1.42089114725e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1002/bjs.1800830139": [{"id": "influence", "unit": [{"value": "3.73834652128e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.73174997252", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.013": [{"id": "influence", "unit": [{"value": "1.54849335991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4885963776", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.02.014": [{"id": "influence", "unit": [{"value": "1.43539656097e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.014": [{"id": "influence", "unit": [{"value": "1.65673923548e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.61188397056", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.015": [{"id": "influence", "unit": [{"value": "1.4250528394e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.05028397056", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.02.016": [{"id": "influence", "unit": [{"value": "1.41753303696e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.42336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.004": [{"id": "influence", "unit": [{"value": "2.40827378083e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3699383296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.016": [{"id": "influence", "unit": [{"value": "2.54472083616e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.12517350811", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.005": [{"id": "influence", "unit": [{"value": "1.40543881698e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.005": [{"id": "influence", "unit": [{"value": "4.98246170342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "13.4951290928", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.017": [{"id": "influence", "unit": [{"value": "2.8107768688e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.99055570262", "key": "score"}]}]} -{"10.1002/bjs.1800770731": [{"id": "influence", "unit": [{"value": "2.12334018404e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.37522926873", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.018": [{"id": "influence", "unit": [{"value": "2.56362457912e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.99687237632", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.006": [{"id": "influence", "unit": [{"value": "1.75711526235e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.125908992", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.019": [{"id": "influence", "unit": [{"value": "2.63339070632e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.83961125752", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.020": [{"id": "influence", "unit": [{"value": "2.55461990418e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.41411782656", "key": "score"}]}]} -{"10.1016/j.bspc.2011.05.010": [{"id": "influence", "unit": [{"value": "1.51285923901e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.254016", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.021": [{"id": "influence", "unit": [{"value": "1.6159596861e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.31560192", "key": "score"}]}]} -{"10.1016/j.bspc.2011.05.011": [{"id": "influence", "unit": [{"value": "1.48980937027e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.14121216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.022": [{"id": "influence", "unit": [{"value": "1.9593105197e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.85463808", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.002": [{"id": "influence", "unit": [{"value": "1.87083337854e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.93569536", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.023": [{"id": "influence", "unit": [{"value": "1.50264494118e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.406656", "key": "score"}]}]} -{"10.1002/bjs.1800800828": [{"id": "influence", "unit": [{"value": "2.05276349756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0877054147111", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.004": [{"id": "influence", "unit": [{"value": "1.53597044234e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.387108561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.024": [{"id": "influence", "unit": [{"value": "2.26725070012e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.761426539184", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.025": [{"id": "influence", "unit": [{"value": "1.77349231284e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.320827392", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.007": [{"id": "influence", "unit": [{"value": "1.40574735545e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.026": [{"id": "influence", "unit": [{"value": "1.65600720661e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3802496", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.027": [{"id": "influence", "unit": [{"value": "3.04379181834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.424121856", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.008": [{"id": "influence", "unit": [{"value": "1.39240284872e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.028": [{"id": "influence", "unit": [{"value": "2.16540873578e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.321552896", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.011": [{"id": "influence", "unit": [{"value": "1.41632306281e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.029": [{"id": "influence", "unit": [{"value": "1.7539262291e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0624137673444", "key": "score"}]}]} -{"10.1002/bjs.1800830140": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.013": [{"id": "influence", "unit": [{"value": "1.45752396087e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4352", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.007": [{"id": "influence", "unit": [{"value": "1.78985531064e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.847287296", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.022": [{"id": "influence", "unit": [{"value": "1.48785696778e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.46403656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.030": [{"id": "influence", "unit": [{"value": "1.75473038943e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.428627456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.031": [{"id": "influence", "unit": [{"value": "2.353279823e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7004429312", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.04.001": [{"id": "influence", "unit": [{"value": "1.48470666863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.008": [{"id": "influence", "unit": [{"value": "2.31587469588e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1074574336", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.032": [{"id": "influence", "unit": [{"value": "1.65229124421e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.331776", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.04.010": [{"id": "influence", "unit": [{"value": "1.44024892224e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.176", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.033": [{"id": "influence", "unit": [{"value": "1.9391730294e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1212379136", "key": "score"}]}]} -{"10.1002/bjs.1800770732": [{"id": "influence", "unit": [{"value": "3.20469067904e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.617849262205", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.034": [{"id": "influence", "unit": [{"value": "2.00336332378e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.5124096", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.05.002": [{"id": "influence", "unit": [{"value": "1.40497558269e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.009": [{"id": "influence", "unit": [{"value": "1.53536956023e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.11010816", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.035": [{"id": "influence", "unit": [{"value": "2.35484498246e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.63538354995", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.05.003": [{"id": "influence", "unit": [{"value": "1.42929749612e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8592", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.010": [{"id": "influence", "unit": [{"value": "4.48210438085e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.83282617277", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.036": [{"id": "influence", "unit": [{"value": "2.40889388834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.97399557531", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.037": [{"id": "influence", "unit": [{"value": "3.40892060939e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.24081647616", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.011": [{"id": "influence", "unit": [{"value": "2.86806610088e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2752125952", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.05.005": [{"id": "influence", "unit": [{"value": "1.42998991144e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792", "key": "score"}]}]} -{"10.1002/bjs.1800800829": [{"id": "influence", "unit": [{"value": "1.81725821605e-07", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.38541937564", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.039": [{"id": "influence", "unit": [{"value": "2.19678753647e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1962923008", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.040": [{"id": "influence", "unit": [{"value": "1.96294134291e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.23938263518", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.06.001": [{"id": "influence", "unit": [{"value": "1.41235330998e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.041": [{"id": "influence", "unit": [{"value": "1.66591219918e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4099147776", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.042": [{"id": "influence", "unit": [{"value": "1.46410726228e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4380466176", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.012": [{"id": "influence", "unit": [{"value": "2.49597519002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.3124057088", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.06.002": [{"id": "influence", "unit": [{"value": "1.40153855775e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.043": [{"id": "influence", "unit": [{"value": "1.42616308507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0340402176", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.013": [{"id": "influence", "unit": [{"value": "2.11485065994e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.931594752", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.06.007": [{"id": "influence", "unit": [{"value": "1.46452413768e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800830142": [{"id": "influence", "unit": [{"value": "3.8525597719e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.550382071798", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.005": [{"id": "influence", "unit": [{"value": "2.3257723807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65705032158", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.004": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.014": [{"id": "influence", "unit": [{"value": "1.5327892699e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3649536", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.006": [{"id": "influence", "unit": [{"value": "1.46835352817e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.07278336", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.005": [{"id": "influence", "unit": [{"value": "1.52733033069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.5456", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.007": [{"id": "influence", "unit": [{"value": "1.54120154122e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.862656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.015": [{"id": "influence", "unit": [{"value": "1.42339402133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0228427776", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.006": [{"id": "influence", "unit": [{"value": "1.3976712191e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.008": [{"id": "influence", "unit": [{"value": "2.26000243195e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.08396432317", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.010": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.07776", "key": "score"}]}]} -{"10.1002/bjs.1800770733": [{"id": "influence", "unit": [{"value": "1.68884478719e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00502888168651", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.016": [{"id": "influence", "unit": [{"value": "5.90446436104e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.5386956015", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.009": [{"id": "influence", "unit": [{"value": "1.46983224999e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.16920576", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.012": [{"id": "influence", "unit": [{"value": "1.39860088294e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.010": [{"id": "influence", "unit": [{"value": "1.50642113061e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.42336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.017": [{"id": "influence", "unit": [{"value": "1.59376657434e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.470071296", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.002": [{"id": "influence", "unit": [{"value": "1.46707085793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.011": [{"id": "influence", "unit": [{"value": "1.65681335047e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.55968912317", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.004": [{"id": "influence", "unit": [{"value": "1.44467832664e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.001": [{"id": "influence", "unit": [{"value": "2.89026709917e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.93443472317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.018": [{"id": "influence", "unit": [{"value": "2.0375067469e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.94857216", "key": "score"}]}]} -{"10.1002/bjs.1800770734": [{"id": "influence", "unit": [{"value": "1.52306975838e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.68456532868e-07", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.003": [{"id": "influence", "unit": [{"value": "1.41364327772e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.004": [{"id": "influence", "unit": [{"value": "2.21466751434e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.00473416158", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.007": [{"id": "influence", "unit": [{"value": "4.86853108806e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "30.2058513695", "key": "score"}]}]} -{"10.1016/j.bspc.2011.08.001": [{"id": "influence", "unit": [{"value": "1.40296984755e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.08.002": [{"id": "influence", "unit": [{"value": "1.46446008016e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.00478976", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.001": [{"id": "influence", "unit": [{"value": "1.50277913293e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1456", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.007": [{"id": "influence", "unit": [{"value": "1.39125640401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.002": [{"id": "influence", "unit": [{"value": "2.30880583754e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.13575936", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.011": [{"id": "influence", "unit": [{"value": "1.4875318134e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.945636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.003": [{"id": "influence", "unit": [{"value": "1.49277898572e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.300416", "key": "score"}]}]} -{"10.1002/bjs.1800800830": [{"id": "influence", "unit": [{"value": "1.39160184535e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.012": [{"id": "influence", "unit": [{"value": "1.45120593182e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4752", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.004": [{"id": "influence", "unit": [{"value": "1.75648155571e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.75135816158", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.013": [{"id": "influence", "unit": [{"value": "1.42718144062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bspc.2011.10.001": [{"id": "influence", "unit": [{"value": "1.56917990837e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50639616", "key": "score"}]}]} -{"10.1016/j.bspc.2011.10.002": [{"id": "influence", "unit": [{"value": "1.55073039343e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.9216", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.003": [{"id": "influence", "unit": [{"value": "4.08184163355e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.5074139232", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.045": [{"id": "influence", "unit": [{"value": "1.57148537666e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.26887458816", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.004": [{"id": "influence", "unit": [{"value": "1.40812470078e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.005": [{"id": "influence", "unit": [{"value": "1.4279434706e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4346496", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.006": [{"id": "influence", "unit": [{"value": "1.39407654571e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1591": [{"id": "influence", "unit": [{"value": "1.43105534658e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.484416", "key": "score"}]}]} -{"10.1002/bjs.1800830145": [{"id": "influence", "unit": [{"value": "1.40455863858e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.046": [{"id": "influence", "unit": [{"value": "1.82938384104e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7020032", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.006": [{"id": "influence", "unit": [{"value": "1.45513920888e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.622656", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.013": [{"id": "influence", "unit": [{"value": "1.39559783055e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.047": [{"id": "influence", "unit": [{"value": "2.7682406054e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.77286771986", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.048": [{"id": "influence", "unit": [{"value": "1.68108910902e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.293796561584", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.016": [{"id": "influence", "unit": [{"value": "1.41132235292e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.007": [{"id": "influence", "unit": [{"value": "1.45417934601e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.046656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.049": [{"id": "influence", "unit": [{"value": "1.66245115877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.87941210112", "key": "score"}]}]} -{"10.1016/j.bspc.2011.12.001": [{"id": "influence", "unit": [{"value": "1.44947873874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0559872", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.018": [{"id": "influence", "unit": [{"value": "1.39642212834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800830146": [{"id": "influence", "unit": [{"value": "2.13786944508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.214116867573", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.051": [{"id": "influence", "unit": [{"value": "2.27454576941e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7399003136", "key": "score"}]}]} -{"10.1016/j.bspc.2012.01.001": [{"id": "influence", "unit": [{"value": "1.42867097637e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2439936", "key": "score"}]}]} -{"10.1016/j.bspc.2012.01.002": [{"id": "influence", "unit": [{"value": "2.7419514496e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.99973192158", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.002": [{"id": "influence", "unit": [{"value": "1.43685773631e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17603656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.001": [{"id": "influence", "unit": [{"value": "2.15244546006e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.12111921152", "key": "score"}]}]} -{"10.1016/j.bspc.2012.02.001": [{"id": "influence", "unit": [{"value": "1.47022156874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.176256", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.003": [{"id": "influence", "unit": [{"value": "1.45558088501e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.002": [{"id": "influence", "unit": [{"value": "1.47304155508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.53865216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.02.002": [{"id": "influence", "unit": [{"value": "3.41579032033e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.98626376158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.003": [{"id": "influence", "unit": [{"value": "2.15231173511e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.741149696", "key": "score"}]}]} -{"10.1002/bjs.1800770735": [{"id": "influence", "unit": [{"value": "1.92149504752e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00363791484297", "key": "score"}]}]} -{"10.1016/j.bspc.2012.02.003": [{"id": "influence", "unit": [{"value": "1.52898970342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.08024832", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.004": [{"id": "influence", "unit": [{"value": "1.77871461002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.072256", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.004": [{"id": "influence", "unit": [{"value": "1.46366832841e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.002": [{"id": "influence", "unit": [{"value": "1.62278902873e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.830016", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.005": [{"id": "influence", "unit": [{"value": "2.09778019577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.49759266816", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.003": [{"id": "influence", "unit": [{"value": "1.39933875286e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.005": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.019": [{"id": "influence", "unit": [{"value": "4.93120641303e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.6419024992", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.006": [{"id": "influence", "unit": [{"value": "1.78927152939e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.5856256", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.004": [{"id": "influence", "unit": [{"value": "1.47189687444e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1002/bjs.1800800831": [{"id": "influence", "unit": [{"value": "2.9648572286e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000674059986411", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.006": [{"id": "influence", "unit": [{"value": "1.41067712469e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.007": [{"id": "influence", "unit": [{"value": "2.08658259817e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.74721816576", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.005": [{"id": "influence", "unit": [{"value": "1.75916854294e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08054856158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.008": [{"id": "influence", "unit": [{"value": "1.43916267878e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1524096", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.020": [{"id": "influence", "unit": [{"value": "3.29252368896e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.52968049118", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.002": [{"id": "influence", "unit": [{"value": "1.68970062986e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.418176", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.009": [{"id": "influence", "unit": [{"value": "2.51299479608e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.586545152", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.008": [{"id": "influence", "unit": [{"value": "1.43655436141e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.003": [{"id": "influence", "unit": [{"value": "1.56910165297e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3616", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.010": [{"id": "influence", "unit": [{"value": "2.50510350062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.28562645402", "key": "score"}]}]} -{"10.1002/bjs.1800830149": [{"id": "influence", "unit": [{"value": "1.44996628301e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.31621703842e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.022": [{"id": "influence", "unit": [{"value": "3.08555072694e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.24665806235", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.011": [{"id": "influence", "unit": [{"value": "4.71181245507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.90610352813", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.004": [{"id": "influence", "unit": [{"value": "1.87118458967e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.484416", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.009": [{"id": "influence", "unit": [{"value": "1.40034306297e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.012": [{"id": "influence", "unit": [{"value": "1.6894598116e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32608317918", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.005": [{"id": "influence", "unit": [{"value": "1.93019250357e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3231232", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.006": [{"id": "influence", "unit": [{"value": "1.40590676851e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.014": [{"id": "influence", "unit": [{"value": "3.18395908128e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.55022424952", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.007": [{"id": "influence", "unit": [{"value": "1.92454730741e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.0294656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.016": [{"id": "influence", "unit": [{"value": "3.58531933966e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.7068543072", "key": "score"}]}]} -{"10.1002/bjs.1800830152": [{"id": "influence", "unit": [{"value": "1.39487844463e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0060466176", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.01.007": [{"id": "influence", "unit": [{"value": "1.41393736665e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.001": [{"id": "influence", "unit": [{"value": "1.48065110517e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.37152", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.017": [{"id": "influence", "unit": [{"value": "2.63861213244e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.13082327518", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.002": [{"id": "influence", "unit": [{"value": "2.03880211876e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.94457232317", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.001": [{"id": "influence", "unit": [{"value": "2.09551529965e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.30398976", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.003": [{"id": "influence", "unit": [{"value": "1.45322684878e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.01.008": [{"id": "influence", "unit": [{"value": "1.39660093165e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.004": [{"id": "influence", "unit": [{"value": "2.04168254788e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.34762496", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.002": [{"id": "influence", "unit": [{"value": "1.40331980195e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.01.014": [{"id": "influence", "unit": [{"value": "1.423113652e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1002/bjs.1800770736": [{"id": "influence", "unit": [{"value": "1.66561348149e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.006719779", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.005": [{"id": "influence", "unit": [{"value": "1.88489488647e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.332288", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.003": [{"id": "influence", "unit": [{"value": "1.54367231064e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08445256158", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.007": [{"id": "influence", "unit": [{"value": "1.60626139405e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7726464", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.004": [{"id": "influence", "unit": [{"value": "1.91627023547e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.8644352", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.001": [{"id": "influence", "unit": [{"value": "1.94373623456e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.7065472", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.002": [{"id": "influence", "unit": [{"value": "1.47337150931e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.63936", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.005": [{"id": "influence", "unit": [{"value": "2.09473054887e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3023142912", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.003": [{"id": "influence", "unit": [{"value": "1.66051940624e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.56172616158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.02.002": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.006": [{"id": "influence", "unit": [{"value": "4.48020877024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.26474456954", "key": "score"}]}]} -{"10.1002/bjs.1800830155": [{"id": "influence", "unit": [{"value": "2.43949008345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89965759807", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.004": [{"id": "influence", "unit": [{"value": "1.67071735997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.3098496", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.002": [{"id": "influence", "unit": [{"value": "1.51021836218e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0879872", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.007": [{"id": "influence", "unit": [{"value": "2.91136190037e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.68252848128", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.003": [{"id": "influence", "unit": [{"value": "2.35071170617e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.40720456158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.008": [{"id": "influence", "unit": [{"value": "1.59654929626e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.849636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.004": [{"id": "influence", "unit": [{"value": "2.47183104353e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.93606032317", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.02.014": [{"id": "influence", "unit": [{"value": "1.43621870424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3056", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.009": [{"id": "influence", "unit": [{"value": "2.51885939553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.8166272", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.005": [{"id": "influence", "unit": [{"value": "2.60852588989e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.87046216158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.010": [{"id": "influence", "unit": [{"value": "2.09644082764e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89427596152", "key": "score"}]}]} -{"10.1002/bjs.1800800833": [{"id": "influence", "unit": [{"value": "2.55234515315e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.214631171353", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.006": [{"id": "influence", "unit": [{"value": "1.41592740525e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.007": [{"id": "influence", "unit": [{"value": "1.60931157026e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.645156561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.011": [{"id": "influence", "unit": [{"value": "1.9171976596e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1630923776", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.008": [{"id": "influence", "unit": [{"value": "1.77151253655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.18646856158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.02.016": [{"id": "influence", "unit": [{"value": "1.41123181198e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bspc.2012.09.002": [{"id": "influence", "unit": [{"value": "1.81080733373e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.804416", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.012": [{"id": "influence", "unit": [{"value": "1.78400697147e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.339024896", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.014": [{"id": "influence", "unit": [{"value": "1.86722652606e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.861257216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.09.003": [{"id": "influence", "unit": [{"value": "1.85721878597e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.54819656158", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.001": [{"id": "influence", "unit": [{"value": "1.56093891548e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392256", "key": "score"}]}]} -{"10.1002/bjs.1800830202": [{"id": "influence", "unit": [{"value": "1.858607198e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0787562838158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.015": [{"id": "influence", "unit": [{"value": "1.44367537251e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.124416", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.06.001": [{"id": "influence", "unit": [{"value": "1.4408438485e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.002": [{"id": "influence", "unit": [{"value": "1.42914282325e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3879936", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.06.002": [{"id": "influence", "unit": [{"value": "1.79318290607e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.31881612152", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.003": [{"id": "influence", "unit": [{"value": "1.43405918808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.07.002": [{"id": "influence", "unit": [{"value": "2.63480635463e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.20876449042", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.004": [{"id": "influence", "unit": [{"value": "1.51600991431e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03776", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.001": [{"id": "influence", "unit": [{"value": "3.41561621069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.48469276469", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.005": [{"id": "influence", "unit": [{"value": "2.0954843164e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.350272", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.002": [{"id": "influence", "unit": [{"value": "2.03007024589e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.20105998814", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1638": [{"id": "influence", "unit": [{"value": "1.97048204379e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.02954056158", "key": "score"}]}]} -{"10.1002/bjs.1800770737": [{"id": "influence", "unit": [{"value": "1.43454963257e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.14094221446e-07", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.006": [{"id": "influence", "unit": [{"value": "1.58802929789e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.892224", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.003": [{"id": "influence", "unit": [{"value": "1.96098591035e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.00031232", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.004": [{"id": "influence", "unit": [{"value": "1.94968738424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.34653229056", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.03.007": [{"id": "influence", "unit": [{"value": "1.44873112468e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.007": [{"id": "influence", "unit": [{"value": "1.41075381717e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.56736", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.005": [{"id": "influence", "unit": [{"value": "1.58344035352e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.066811392", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.003": [{"id": "influence", "unit": [{"value": "1.98011585561e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.9927936", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.023": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.010077696", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.03.012": [{"id": "influence", "unit": [{"value": "1.4670945341e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.004": [{"id": "influence", "unit": [{"value": "1.40719075877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800830203": [{"id": "influence", "unit": [{"value": "2.0770745324e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000183555403284", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.006": [{"id": "influence", "unit": [{"value": "1.41271524791e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.010077696", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.005": [{"id": "influence", "unit": [{"value": "1.69216745379e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.222656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.024": [{"id": "influence", "unit": [{"value": "3.08536854974e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.67624991677", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.006": [{"id": "influence", "unit": [{"value": "1.48567657775e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.778176", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.09.001": [{"id": "influence", "unit": [{"value": "1.57686722593e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.9684096", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.007": [{"id": "influence", "unit": [{"value": "1.44264027248e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.03.013": [{"id": "influence", "unit": [{"value": "1.39260374604e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.025": [{"id": "influence", "unit": [{"value": "2.37603130176e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.37308795358", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.008": [{"id": "influence", "unit": [{"value": "2.06935444063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.8096256", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.09.002": [{"id": "influence", "unit": [{"value": "2.29069082446e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.76957126656", "key": "score"}]}]} -{"10.1002/bjs.1800770738": [{"id": "influence", "unit": [{"value": "1.7500071181e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000320992486649", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.009": [{"id": "influence", "unit": [{"value": "1.50593182995e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0777965615844", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.09.003": [{"id": "influence", "unit": [{"value": "2.25729707375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.06345216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.001": [{"id": "influence", "unit": [{"value": "1.47509940962e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.992256", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.001": [{"id": "influence", "unit": [{"value": "2.41308058053e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.4571136", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.001": [{"id": "influence", "unit": [{"value": "2.10088642476e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.29884027358", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.003": [{"id": "influence", "unit": [{"value": "1.45828465076e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.262692561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.002": [{"id": "influence", "unit": [{"value": "3.90076920919e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.00985654067", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.04.003": [{"id": "influence", "unit": [{"value": "1.45606959808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.004": [{"id": "influence", "unit": [{"value": "1.43571558404e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.67776", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.005": [{"id": "influence", "unit": [{"value": "1.50594954341e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800800837": [{"id": "influence", "unit": [{"value": "6.24597092541e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.733724383441", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.003": [{"id": "influence", "unit": [{"value": "2.10065608511e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.23003830069", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.002": [{"id": "influence", "unit": [{"value": "1.6486496091e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.177417216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.006": [{"id": "influence", "unit": [{"value": "1.45961810886e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.51552", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.004": [{"id": "influence", "unit": [{"value": "1.57516928853e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.656733696", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.001": [{"id": "influence", "unit": [{"value": "2.24775304269e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.82166856158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.005": [{"id": "influence", "unit": [{"value": "3.44497115805e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.3739594752", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.003": [{"id": "influence", "unit": [{"value": "1.70979424538e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.284032", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.04.005": [{"id": "influence", "unit": [{"value": "1.42169219577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.003": [{"id": "influence", "unit": [{"value": "2.61737931603e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.36019960152", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.006": [{"id": "influence", "unit": [{"value": "1.81186047743e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.87449928158", "key": "score"}]}]} -{"10.1002/bjs.1800830204": [{"id": "influence", "unit": [{"value": "2.60412862308e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.481741015127", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.004": [{"id": "influence", "unit": [{"value": "1.47720057206e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.300416", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.005": [{"id": "influence", "unit": [{"value": "4.26903097374e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "21.8027611232", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.009": [{"id": "influence", "unit": [{"value": "2.23307511579e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.990199296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.007": [{"id": "influence", "unit": [{"value": "1.73383624405e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.96765256158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.010": [{"id": "influence", "unit": [{"value": "1.96084984716e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.47321133056", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.004": [{"id": "influence", "unit": [{"value": "2.10585129583e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.332727296", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.05.004": [{"id": "influence", "unit": [{"value": "1.39926815551e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.008": [{"id": "influence", "unit": [{"value": "1.42307022879e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.012": [{"id": "influence", "unit": [{"value": "2.16767541345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.09906493918", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.009": [{"id": "influence", "unit": [{"value": "1.42648139061e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.10976", "key": "score"}]}]} -{"10.1002/bjs.1800770742": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.013": [{"id": "influence", "unit": [{"value": "1.5868275828e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.940416", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.001": [{"id": "influence", "unit": [{"value": "1.51359710756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.097856", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.014": [{"id": "influence", "unit": [{"value": "1.75997908697e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.65738797056", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.005": [{"id": "influence", "unit": [{"value": "1.73561874023e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.38471936", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.002": [{"id": "influence", "unit": [{"value": "1.46010837206e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.56736", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.015": [{"id": "influence", "unit": [{"value": "3.73714413721e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.2046204505", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.05.008": [{"id": "influence", "unit": [{"value": "1.43489715264e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.003": [{"id": "influence", "unit": [{"value": "2.20181050117e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.129782807922", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.016": [{"id": "influence", "unit": [{"value": "1.5096982362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.004": [{"id": "influence", "unit": [{"value": "1.48751300871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.562176", "key": "score"}]}]} -{"10.1002/bjs.1800830205": [{"id": "influence", "unit": [{"value": "2.07546745762e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.389780541338", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.06.004": [{"id": "influence", "unit": [{"value": "1.42154378584e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.006": [{"id": "influence", "unit": [{"value": "1.92612889953e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.736576", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.017": [{"id": "influence", "unit": [{"value": "5.87547960299e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.8464802707", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.007": [{"id": "influence", "unit": [{"value": "1.69981358024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08381256158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.018": [{"id": "influence", "unit": [{"value": "3.93068999929e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.06270442429", "key": "score"}]}]} -{"10.1016/j.bspc.2013.03.001": [{"id": "influence", "unit": [{"value": "1.59080169945e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.838656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.019": [{"id": "influence", "unit": [{"value": "2.93890424354e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.794432721584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.001": [{"id": "influence", "unit": [{"value": "1.41632660599e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.15552", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.07.002": [{"id": "influence", "unit": [{"value": "1.7538379036e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.020": [{"id": "influence", "unit": [{"value": "2.05213687439e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.6966651747", "key": "score"}]}]} -{"10.1002/bjs.1800770743": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.002": [{"id": "influence", "unit": [{"value": "1.46668954781e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8592", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.003": [{"id": "influence", "unit": [{"value": "2.04158706672e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.28099656158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.08.002": [{"id": "influence", "unit": [{"value": "1.41989530088e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.021": [{"id": "influence", "unit": [{"value": "1.43581558559e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.020155392", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.004": [{"id": "influence", "unit": [{"value": "1.59322968531e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.5072", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.022": [{"id": "influence", "unit": [{"value": "2.53870392939e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.380812288", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.005": [{"id": "influence", "unit": [{"value": "1.63957594784e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.446016", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.023": [{"id": "influence", "unit": [{"value": "2.04162205464e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.14829122493", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.007": [{"id": "influence", "unit": [{"value": "1.47964959347e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.006": [{"id": "influence", "unit": [{"value": "1.45680104502e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1002/bjs.1800830206": [{"id": "influence", "unit": [{"value": "4.7755259417e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.75292801941", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.024": [{"id": "influence", "unit": [{"value": "1.54217630592e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.132627456", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.007": [{"id": "influence", "unit": [{"value": "1.40656153948e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.001": [{"id": "influence", "unit": [{"value": "1.58954288535e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.226400182272", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.001": [{"id": "influence", "unit": [{"value": "1.79480243584e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08135312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.002": [{"id": "influence", "unit": [{"value": "1.90218857374e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.13314502656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.002": [{"id": "influence", "unit": [{"value": "1.51741394277e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.622656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.003": [{"id": "influence", "unit": [{"value": "1.5043342575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.56736", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.003": [{"id": "influence", "unit": [{"value": "1.7649818355e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.947037696", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.004": [{"id": "influence", "unit": [{"value": "1.54003126301e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3284096", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.017": [{"id": "influence", "unit": [{"value": "1.39858392433e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800770802": [{"id": "influence", "unit": [{"value": "1.45011339633e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00366490060093", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.005": [{"id": "influence", "unit": [{"value": "1.45092474662e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8439936", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.004": [{"id": "influence", "unit": [{"value": "1.82109019622e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.692416", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.006": [{"id": "influence", "unit": [{"value": "1.6345833814e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.02976", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.007": [{"id": "influence", "unit": [{"value": "1.54062730593e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.536", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.019": [{"id": "influence", "unit": [{"value": "1.42045726031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.008": [{"id": "influence", "unit": [{"value": "1.41288622828e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.009": [{"id": "influence", "unit": [{"value": "1.73146483971e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.320192", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.010": [{"id": "influence", "unit": [{"value": "2.67760514906e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.183296", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.020": [{"id": "influence", "unit": [{"value": "1.4535188035e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.011": [{"id": "influence", "unit": [{"value": "1.41922409304e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.164": [{"id": "influence", "unit": [{"value": "1.7956661054e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.4537344", "key": "score"}]}]} -{"10.1002/bjs.1800830207": [{"id": "influence", "unit": [{"value": "4.32253172896e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.76958348313", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.001": [{"id": "influence", "unit": [{"value": "1.76383175573e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.96861256158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.002": [{"id": "influence", "unit": [{"value": "1.41096202364e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.003": [{"id": "influence", "unit": [{"value": "1.41108227319e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43776", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.10.014": [{"id": "influence", "unit": [{"value": "1.45665273718e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.004": [{"id": "influence", "unit": [{"value": "2.38261952892e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.7201536", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.005": [{"id": "influence", "unit": [{"value": "2.21563430941e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.779712", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.006": [{"id": "influence", "unit": [{"value": "1.73040878704e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.933156561584", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.10.016": [{"id": "influence", "unit": [{"value": "1.43358943959e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.006": [{"id": "influence", "unit": [{"value": "5.37226124464e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.7435994617", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.007": [{"id": "influence", "unit": [{"value": "1.48555122279e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.27776", "key": "score"}]}]} -{"10.1002/bjs.1800770803": [{"id": "influence", "unit": [{"value": "1.91665335176e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0807617024201", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.006": [{"id": "influence", "unit": [{"value": "2.03315475884e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.07008", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.008": [{"id": "influence", "unit": [{"value": "1.59922182052e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.78976", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.007": [{"id": "influence", "unit": [{"value": "5.54734257544e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.97335423933", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.10.017": [{"id": "influence", "unit": [{"value": "1.40149434529e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.009": [{"id": "influence", "unit": [{"value": "1.48209655985e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.622656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.010": [{"id": "influence", "unit": [{"value": "1.42292725345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.007": [{"id": "influence", "unit": [{"value": "1.57140606031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50121216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.008": [{"id": "influence", "unit": [{"value": "2.06004951422e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.33518811546", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.011": [{"id": "influence", "unit": [{"value": "2.05131160693e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.35766856158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.009": [{"id": "influence", "unit": [{"value": "2.14117908561e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.03087548416", "key": "score"}]}]} -{"10.1002/bjs.1800830208": [{"id": "influence", "unit": [{"value": "3.20505216178e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.60325951815", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.11.006": [{"id": "influence", "unit": [{"value": "1.64070418842e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.552", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.009": [{"id": "influence", "unit": [{"value": "1.60503395919e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.585939456", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.012": [{"id": "influence", "unit": [{"value": "1.46380890355e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.124416", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.010": [{"id": "influence", "unit": [{"value": "1.78447836614e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.434909696", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.014": [{"id": "influence", "unit": [{"value": "2.10439938784e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.41984", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.002": [{"id": "influence", "unit": [{"value": "1.47791433203e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.171072", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.011": [{"id": "influence", "unit": [{"value": "1.51450599992e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.021149724672", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.010": [{"id": "influence", "unit": [{"value": "1.61980833189e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.450077696", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.11.007": [{"id": "influence", "unit": [{"value": "2.20029297808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.776", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.012": [{"id": "influence", "unit": [{"value": "1.4807537002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1213056", "key": "score"}]}]} -{"10.1002/bjs.1800770804": [{"id": "influence", "unit": [{"value": "2.69852546285e-07", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.6426677993", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.003": [{"id": "influence", "unit": [{"value": "1.47877912317e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0752", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.013": [{"id": "influence", "unit": [{"value": "1.74377162081e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.902528387089", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.004": [{"id": "influence", "unit": [{"value": "1.5601937833e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.52736", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.011": [{"id": "influence", "unit": [{"value": "1.82275547777e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.992983761584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.005": [{"id": "influence", "unit": [{"value": "1.71330840977e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.03741256158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.014": [{"id": "influence", "unit": [{"value": "2.05927561501e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0968395776", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.11.021": [{"id": "influence", "unit": [{"value": "1.43846583976e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576036561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.006": [{"id": "influence", "unit": [{"value": "1.86722755857e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.948608", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.015": [{"id": "influence", "unit": [{"value": "2.0221560342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.79577928158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.12.002": [{"id": "influence", "unit": [{"value": "1.50003547797e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600036561584", "key": "score"}]}]} -{"10.1002/bjs.1800830209": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.012": [{"id": "influence", "unit": [{"value": "1.51949775879e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.65279232", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.009": [{"id": "influence", "unit": [{"value": "1.47460748411e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.016": [{"id": "influence", "unit": [{"value": "1.76021627338e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.204256301744", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.010": [{"id": "influence", "unit": [{"value": "1.69820215509e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.44896", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.017": [{"id": "influence", "unit": [{"value": "1.69487650784e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.459116752896", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.011": [{"id": "influence", "unit": [{"value": "1.98600655464e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.71143312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.013": [{"id": "influence", "unit": [{"value": "1.62681987218e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.686011392", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.012": [{"id": "influence", "unit": [{"value": "1.42747769246e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.29376", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.001": [{"id": "influence", "unit": [{"value": "1.88543041649e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.310252032", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.12.005": [{"id": "influence", "unit": [{"value": "1.4000325672e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1002/bjs.1800770805": [{"id": "influence", "unit": [{"value": "3.95911883883e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.130740273398", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.003": [{"id": "influence", "unit": [{"value": "2.82814827126e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.99266043291", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.014": [{"id": "influence", "unit": [{"value": "4.80336134235e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.5715934399", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.001": [{"id": "influence", "unit": [{"value": "1.49317096206e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.004": [{"id": "influence", "unit": [{"value": "1.70126097738e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.832160875184", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.01.008": [{"id": "influence", "unit": [{"value": "1.3891346097e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.003": [{"id": "influence", "unit": [{"value": "1.48610121089e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03779656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.006": [{"id": "influence", "unit": [{"value": "2.25782736868e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.61282376158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.004": [{"id": "influence", "unit": [{"value": "1.47377372924e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4752", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.007": [{"id": "influence", "unit": [{"value": "2.09474263951e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.96689593277", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.015": [{"id": "influence", "unit": [{"value": "1.61635696414e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.782393856", "key": "score"}]}]} -{"10.1002/bjs.1800830210": [{"id": "influence", "unit": [{"value": "2.11466703865e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.51222298843", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.01.010": [{"id": "influence", "unit": [{"value": "1.44925719265e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.008": [{"id": "influence", "unit": [{"value": "2.71276906539e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33538263518", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.006": [{"id": "influence", "unit": [{"value": "2.54979930121e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.358528", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.017": [{"id": "influence", "unit": [{"value": "1.48833157595e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.093312", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.007": [{"id": "influence", "unit": [{"value": "1.49683898057e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.171072", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.009": [{"id": "influence", "unit": [{"value": "1.61364350622e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.212497617584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.008": [{"id": "influence", "unit": [{"value": "1.79526562745e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.265216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.001": [{"id": "influence", "unit": [{"value": "1.80129550723e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.5878656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.009": [{"id": "influence", "unit": [{"value": "1.60124235618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.65312", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.02.003": [{"id": "influence", "unit": [{"value": "1.56392297095e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49603656158", "key": "score"}]}]} -{"10.1002/bjs.1800770806": [{"id": "influence", "unit": [{"value": "4.19961167825e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.336593663636", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.010": [{"id": "influence", "unit": [{"value": "1.54227647079e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.387108561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.011": [{"id": "influence", "unit": [{"value": "1.42174301871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.406656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.012": [{"id": "influence", "unit": [{"value": "1.71620737386e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.73703312317", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.001": [{"id": "influence", "unit": [{"value": "1.97476892991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.08288", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.03.014": [{"id": "influence", "unit": [{"value": "1.40737074166e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.002": [{"id": "influence", "unit": [{"value": "2.75902099141e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.34757192158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.002": [{"id": "influence", "unit": [{"value": "1.4221541576e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0777965615844", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.006": [{"id": "influence", "unit": [{"value": "1.53714684338e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.691849123169", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.003": [{"id": "influence", "unit": [{"value": "2.74075214023e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.10542459358", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.007": [{"id": "influence", "unit": [{"value": "1.40484945831e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800830212": [{"id": "influence", "unit": [{"value": "2.66969531353e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.775297283766", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.008": [{"id": "influence", "unit": [{"value": "1.53719196499e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.59229256158", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.06.014": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.001": [{"id": "influence", "unit": [{"value": "1.57961160131e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.54045256158", "key": "score"}]}]} -{"10.1016/j.bulm.2003.06.001": [{"id": "influence", "unit": [{"value": "1.80732611524e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.29635246542", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.003": [{"id": "influence", "unit": [{"value": "1.47190380983e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.004": [{"id": "influence", "unit": [{"value": "1.430203191e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.005": [{"id": "influence", "unit": [{"value": "1.60126571041e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.74336", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.001": [{"id": "influence", "unit": [{"value": "1.49168734938e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.006": [{"id": "influence", "unit": [{"value": "1.50298327317e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.92003656158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.007": [{"id": "influence", "unit": [{"value": "2.62308960492e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.820352", "key": "score"}]}]} -{"10.1002/bjs.1800770808": [{"id": "influence", "unit": [{"value": "3.62464274217e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.250905373926", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.008": [{"id": "influence", "unit": [{"value": "1.47031938238e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.003": [{"id": "influence", "unit": [{"value": "1.53172771927e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.05670576033", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.009": [{"id": "influence", "unit": [{"value": "1.58684001141e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.176", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.001": [{"id": "influence", "unit": [{"value": "3.39234937342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.59072", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.002": [{"id": "influence", "unit": [{"value": "1.71916767108e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.58336", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.004": [{"id": "influence", "unit": [{"value": "2.14182522117e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.712082843704", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.003": [{"id": "influence", "unit": [{"value": "1.5445932303e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792036561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.005": [{"id": "influence", "unit": [{"value": "1.44920401063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.29376", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.006": [{"id": "influence", "unit": [{"value": "2.09740130553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.01219656158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.008": [{"id": "influence", "unit": [{"value": "1.55587287007e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.78336", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.170": [{"id": "influence", "unit": [{"value": "1.92783349862e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.30695824317", "key": "score"}]}]} -{"10.1002/bjs.1800830213": [{"id": "influence", "unit": [{"value": "1.9228154406e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0186312116195", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.009": [{"id": "influence", "unit": [{"value": "1.47562713863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.484452561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.010": [{"id": "influence", "unit": [{"value": "1.69559094393e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.49536", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.005": [{"id": "influence", "unit": [{"value": "1.70752904263e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.495345007", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.001": [{"id": "influence", "unit": [{"value": "1.45869463639e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.86976", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.006": [{"id": "influence", "unit": [{"value": "1.90665348612e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.11658592037", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.003": [{"id": "influence", "unit": [{"value": "2.36070212814e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.41216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.005": [{"id": "influence", "unit": [{"value": "1.52177833636e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.46656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.004": [{"id": "influence", "unit": [{"value": "1.62691418254e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.6938843136", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.007": [{"id": "influence", "unit": [{"value": "1.60055584054e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.881929123169", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.007": [{"id": "influence", "unit": [{"value": "3.0909151793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.21294559475", "key": "score"}]}]} -{"10.1002/bjs.1800770809": [{"id": "influence", "unit": [{"value": "2.17559933322e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0565278437647", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.009": [{"id": "influence", "unit": [{"value": "2.01097818689e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.4624", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.001": [{"id": "influence", "unit": [{"value": "1.4652504107e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50976", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.004": [{"id": "influence", "unit": [{"value": "1.53686766653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.574016", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.005": [{"id": "influence", "unit": [{"value": "1.72482229454e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.784249856", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.005": [{"id": "influence", "unit": [{"value": "1.43713770834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.008": [{"id": "influence", "unit": [{"value": "1.46156247007e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.399847550386", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.007": [{"id": "influence", "unit": [{"value": "1.59720123874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.792256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.008": [{"id": "influence", "unit": [{"value": "1.84986898731e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.70339656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.009": [{"id": "influence", "unit": [{"value": "2.33910266535e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.847232", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.006": [{"id": "influence", "unit": [{"value": "1.51993130901e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.152446161584", "key": "score"}]}]} -{"10.1002/bjs.1800830214": [{"id": "influence", "unit": [{"value": "2.10522763705e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03109834889", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.010": [{"id": "influence", "unit": [{"value": "1.79772261204e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.613376", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.009": [{"id": "influence", "unit": [{"value": "2.60879631462e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.69399400721", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.011": [{"id": "influence", "unit": [{"value": "1.66223463553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.85376", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.012": [{"id": "influence", "unit": [{"value": "1.4595335457e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.37152", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.007": [{"id": "influence", "unit": [{"value": "1.76240621174e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0560256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.001": [{"id": "influence", "unit": [{"value": "1.85982964893e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.057472", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.002": [{"id": "influence", "unit": [{"value": "1.66924539165e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.0496", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.010": [{"id": "influence", "unit": [{"value": "6.75628008372e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.31071340444", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.005": [{"id": "influence", "unit": [{"value": "1.66228182194e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2752", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.011": [{"id": "influence", "unit": [{"value": "1.76003945695e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.112066832532", "key": "score"}]}]} -{"10.1002/bjs.1800770810": [{"id": "influence", "unit": [{"value": "2.89020671133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00151463222663", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.008": [{"id": "influence", "unit": [{"value": "1.43008843857e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0777965615844", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.009": [{"id": "influence", "unit": [{"value": "1.92105415314e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.53059656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.010": [{"id": "influence", "unit": [{"value": "1.76155880783e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.63399312317", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.012": [{"id": "influence", "unit": [{"value": "1.68809250653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.942568402374", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.011": [{"id": "influence", "unit": [{"value": "1.63678945608e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.47296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.012": [{"id": "influence", "unit": [{"value": "1.56715319777e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.928256", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.013": [{"id": "influence", "unit": [{"value": "1.6610562052e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.370599478788", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.014": [{"id": "influence", "unit": [{"value": "1.58564858536e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.361306069402", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.015": [{"id": "influence", "unit": [{"value": "1.67918091154e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.293599066522", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.001": [{"id": "influence", "unit": [{"value": "1.6178572876e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.95936", "key": "score"}]}]} -{"10.1002/bjs.1800830215": [{"id": "influence", "unit": [{"value": "3.95872687348e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.119697056722", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.016": [{"id": "influence", "unit": [{"value": "2.07426602679e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.71532561397", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.004": [{"id": "influence", "unit": [{"value": "3.0367948727e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.6020388079", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.005": [{"id": "influence", "unit": [{"value": "1.64722280811e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.568", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.001": [{"id": "influence", "unit": [{"value": "1.67859186006e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.120013982761", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.006": [{"id": "influence", "unit": [{"value": "1.6067774989e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.43555656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.007": [{"id": "influence", "unit": [{"value": "2.53637875633e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.0832", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.008": [{"id": "influence", "unit": [{"value": "1.56942794029e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.86589256158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.009": [{"id": "influence", "unit": [{"value": "1.4479145491e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.002": [{"id": "influence", "unit": [{"value": "1.67656432427e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.215988055179", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.010": [{"id": "influence", "unit": [{"value": "1.8198075692e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49699656158", "key": "score"}]}]} -{"10.1002/bjs.1800770811": [{"id": "influence", "unit": [{"value": "1.67187553969e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0695100183041", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.012": [{"id": "influence", "unit": [{"value": "1.95532453508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.515593123169", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.003": [{"id": "influence", "unit": [{"value": "1.40775765812e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0013060694016", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.013": [{"id": "influence", "unit": [{"value": "1.5060128486e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08963656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.001": [{"id": "influence", "unit": [{"value": "1.71744317531e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.18819656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.003": [{"id": "influence", "unit": [{"value": "1.70700554472e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.005": [{"id": "influence", "unit": [{"value": "1.60913042425e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.141247514051", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.004": [{"id": "influence", "unit": [{"value": "1.54004068867e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.176256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.005": [{"id": "influence", "unit": [{"value": "1.43786305861e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.008": [{"id": "influence", "unit": [{"value": "1.59868004966e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.006": [{"id": "influence", "unit": [{"value": "1.51352700749e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.047998630986", "key": "score"}]}]} -{"10.1002/bjs.1800830216": [{"id": "influence", "unit": [{"value": "4.19695308756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.892091916084", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.009": [{"id": "influence", "unit": [{"value": "1.44773774163e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43776", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.010": [{"id": "influence", "unit": [{"value": "1.40286074715e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.011": [{"id": "influence", "unit": [{"value": "1.43452379454e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.20736", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.008": [{"id": "influence", "unit": [{"value": "1.47851454712e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.265595636751", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.001": [{"id": "influence", "unit": [{"value": "1.53184569597e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.14752", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.002": [{"id": "influence", "unit": [{"value": "2.08976321636e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.77312", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.003": [{"id": "influence", "unit": [{"value": "1.5362319803e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4592", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.004": [{"id": "influence", "unit": [{"value": "1.43551360917e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.009": [{"id": "influence", "unit": [{"value": "2.07662142151e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17324559877", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.006": [{"id": "influence", "unit": [{"value": "1.38758095618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800770813": [{"id": "influence", "unit": [{"value": "5.43028907368e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43228064039", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.008": [{"id": "influence", "unit": [{"value": "2.48008956791e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.92696023518", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.007": [{"id": "influence", "unit": [{"value": "1.55569630314e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1616", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.003": [{"id": "influence", "unit": [{"value": "1.47652834699e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2048", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.001": [{"id": "influence", "unit": [{"value": "2.34941079685e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.64104412729", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.005": [{"id": "influence", "unit": [{"value": "1.46581848803e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.008": [{"id": "influence", "unit": [{"value": "1.48316418026e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.152", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.009": [{"id": "influence", "unit": [{"value": "1.55159297928e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3684096", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.009": [{"id": "influence", "unit": [{"value": "4.64407583043e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "25.0781621774", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.002": [{"id": "influence", "unit": [{"value": "1.84359097451e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.97892349444", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.010": [{"id": "influence", "unit": [{"value": "1.45882281845e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.29376", "key": "score"}]}]} -{"10.1002/bjs.1800830217": [{"id": "influence", "unit": [{"value": "2.66857838618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2939630017", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.001": [{"id": "influence", "unit": [{"value": "1.74339843714e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.47296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.002": [{"id": "influence", "unit": [{"value": "1.97864177754e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.8432", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.003": [{"id": "influence", "unit": [{"value": "1.43205516236e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.008223399936", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.010": [{"id": "influence", "unit": [{"value": "1.65211302766e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7844096", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.003": [{"id": "influence", "unit": [{"value": "1.70791309048e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1648", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.005": [{"id": "influence", "unit": [{"value": "1.46148049345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32003656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.004": [{"id": "influence", "unit": [{"value": "2.41893049083e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.14408257536", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.004": [{"id": "influence", "unit": [{"value": "1.48940446704e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.349264532144", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.006": [{"id": "influence", "unit": [{"value": "1.46865948655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7776", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.012": [{"id": "influence", "unit": [{"value": "3.32098166992e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.85721559518", "key": "score"}]}]} -{"10.1002/bjs.1800800838": [{"id": "influence", "unit": [{"value": "1.54541422129e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.614175851545", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.007": [{"id": "influence", "unit": [{"value": "1.86833555051e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.17539656158", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.006": [{"id": "influence", "unit": [{"value": "1.43480222367e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.010861337641", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.013": [{"id": "influence", "unit": [{"value": "1.73995286966e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2633472", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.009": [{"id": "influence", "unit": [{"value": "1.40352714524e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600073123169", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.014": [{"id": "influence", "unit": [{"value": "1.49761635864e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.525312", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.011": [{"id": "influence", "unit": [{"value": "1.82224540298e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.38312224634", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.005": [{"id": "influence", "unit": [{"value": "1.45660433811e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.176256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.012": [{"id": "influence", "unit": [{"value": "1.500939189e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.152", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.013": [{"id": "influence", "unit": [{"value": "1.45016686891e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0896", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.006": [{"id": "influence", "unit": [{"value": "3.83122757192e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.90968338432", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.175": [{"id": "influence", "unit": [{"value": "1.62468100526e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.193216", "key": "score"}]}]} -{"10.1002/bjs.1800770814": [{"id": "influence", "unit": [{"value": "1.56862848446e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.19369506404e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.014": [{"id": "influence", "unit": [{"value": "1.51143633444e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.12736", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.001": [{"id": "influence", "unit": [{"value": "1.70044408816e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.496832", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.004": [{"id": "influence", "unit": [{"value": "1.71093837745e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.56323656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.008": [{"id": "influence", "unit": [{"value": "1.97078859418e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.73177645056", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.005": [{"id": "influence", "unit": [{"value": "1.49780415935e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.006": [{"id": "influence", "unit": [{"value": "1.43503066091e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.009": [{"id": "influence", "unit": [{"value": "1.61204862976e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.6192", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.007": [{"id": "influence", "unit": [{"value": "1.62852962137e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.912996561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.010": [{"id": "influence", "unit": [{"value": "1.99662853046e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.311823872", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.009": [{"id": "influence", "unit": [{"value": "1.41070820507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1002/bjs.1800830218": [{"id": "influence", "unit": [{"value": "7.96286510199e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.519748681", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.010": [{"id": "influence", "unit": [{"value": "1.5802843193e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.96167312317", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.012": [{"id": "influence", "unit": [{"value": "3.840869763e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.41320899448", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.011": [{"id": "influence", "unit": [{"value": "1.42104594681e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.007": [{"id": "influence", "unit": [{"value": "1.44230090756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0060466176", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.013": [{"id": "influence", "unit": [{"value": "1.5863498967e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.14336", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.014": [{"id": "influence", "unit": [{"value": "2.31232021273e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.3952", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.013": [{"id": "influence", "unit": [{"value": "1.84917821836e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.62628397056", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.002": [{"id": "influence", "unit": [{"value": "1.48687602794e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.003": [{"id": "influence", "unit": [{"value": "1.41684524348e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.008": [{"id": "influence", "unit": [{"value": "1.63027919676e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0571237025385", "key": "score"}]}]} -{"10.1002/bjs.1800770815": [{"id": "influence", "unit": [{"value": "3.36222157107e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.136808818018", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.004": [{"id": "influence", "unit": [{"value": "1.95535934787e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.44099656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.014": [{"id": "influence", "unit": [{"value": "3.69147584024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.98048985088", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.005": [{"id": "influence", "unit": [{"value": "1.39845495997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.002": [{"id": "influence", "unit": [{"value": "1.80513074722e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.02760913374", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.006": [{"id": "influence", "unit": [{"value": "2.03447659693e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.09565256158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.007": [{"id": "influence", "unit": [{"value": "1.62927318462e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.61856", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.008": [{"id": "influence", "unit": [{"value": "1.54581024603e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.93696", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.009": [{"id": "influence", "unit": [{"value": "1.46435880325e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8352", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.003": [{"id": "influence", "unit": [{"value": "2.16395647262e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3727835136", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.010": [{"id": "influence", "unit": [{"value": "1.46868368579e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1002/bjs.1800830219": [{"id": "influence", "unit": [{"value": "2.50642815265e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0178183460684", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.011": [{"id": "influence", "unit": [{"value": "1.40482513888e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.004": [{"id": "influence", "unit": [{"value": "1.66018178917e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.248957035184", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.012": [{"id": "influence", "unit": [{"value": "1.44499446681e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32963656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.005": [{"id": "influence", "unit": [{"value": "2.13345493462e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08107354112", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.013": [{"id": "influence", "unit": [{"value": "1.76733860818e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.11136", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.015": [{"id": "influence", "unit": [{"value": "1.67138620507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.04576", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.006": [{"id": "influence", "unit": [{"value": "3.44677020463e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.7206677347", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.002": [{"id": "influence", "unit": [{"value": "1.53338439518e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.003": [{"id": "influence", "unit": [{"value": "1.4975981112e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1696", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.005": [{"id": "influence", "unit": [{"value": "1.54039426793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89383312317", "key": "score"}]}]} -{"10.1002/bjs.1800800839": [{"id": "influence", "unit": [{"value": "1.91873295174e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48228758095e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.007": [{"id": "influence", "unit": [{"value": "1.47931059859e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.008": [{"id": "influence", "unit": [{"value": "2.01423051188e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.19054624634", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.001": [{"id": "influence", "unit": [{"value": "1.68693618434e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3580466176", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.009": [{"id": "influence", "unit": [{"value": "1.72914219669e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9568", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.015": [{"id": "influence", "unit": [{"value": "1.73360913543e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.93641216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.011": [{"id": "influence", "unit": [{"value": "1.43679218811e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.20736", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.012": [{"id": "influence", "unit": [{"value": "1.78061002271e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.70816", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.002": [{"id": "influence", "unit": [{"value": "1.61820004971e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.45097294234", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.013": [{"id": "influence", "unit": [{"value": "1.48765552864e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.001": [{"id": "influence", "unit": [{"value": "1.59737661395e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.05696", "key": "score"}]}]} -{"10.1002/bjs.1800830220": [{"id": "influence", "unit": [{"value": "3.8815829655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.16238449899", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.016": [{"id": "influence", "unit": [{"value": "2.78066873764e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7056", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.002": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.003": [{"id": "influence", "unit": [{"value": "1.90267173877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.55363656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.004": [{"id": "influence", "unit": [{"value": "1.50843971772e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.859236561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.003": [{"id": "influence", "unit": [{"value": "1.49853112689e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.152200912896", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.005": [{"id": "influence", "unit": [{"value": "1.52771917375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3488", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.008": [{"id": "influence", "unit": [{"value": "1.45847695161e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.004": [{"id": "influence", "unit": [{"value": "1.42787789848e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.009": [{"id": "influence", "unit": [{"value": "2.10890729982e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.62816", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.010": [{"id": "influence", "unit": [{"value": "1.57175886062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.7216", "key": "score"}]}]} -{"10.1002/bjs.1800770816": [{"id": "influence", "unit": [{"value": "2.71163748513e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.774674369216", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.005": [{"id": "influence", "unit": [{"value": "1.65151993295e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.14121216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.003": [{"id": "influence", "unit": [{"value": "1.52570480424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.032", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.006": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0279936", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.005": [{"id": "influence", "unit": [{"value": "2.71337763508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.55936", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.006": [{"id": "influence", "unit": [{"value": "1.47106510598e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.007": [{"id": "influence", "unit": [{"value": "1.53777238981e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.06857312256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.007": [{"id": "influence", "unit": [{"value": "1.40080939677e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.008": [{"id": "influence", "unit": [{"value": "1.89034298199e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.9216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.012": [{"id": "influence", "unit": [{"value": "1.66752560965e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.31936", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.001": [{"id": "influence", "unit": [{"value": "1.46425603437e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936036561584", "key": "score"}]}]} -{"10.1002/bjs.1800800840": [{"id": "influence", "unit": [{"value": "3.67966558346e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.40760305102", "key": "score"}]}]} -{"10.1016/j.cad.2007.09.002": [{"id": "influence", "unit": [{"value": "1.46236770279e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.002176782336", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.002": [{"id": "influence", "unit": [{"value": "2.23567019421e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.30403656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.003": [{"id": "influence", "unit": [{"value": "1.47732821531e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.004": [{"id": "influence", "unit": [{"value": "1.4209459997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7296", "key": "score"}]}]} -{"10.1016/j.cad.2007.09.003": [{"id": "influence", "unit": [{"value": "2.71062666759e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.07416229888", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.005": [{"id": "influence", "unit": [{"value": "3.62317493769e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.3009096848", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.007": [{"id": "influence", "unit": [{"value": "1.91660266579e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.64576", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.008": [{"id": "influence", "unit": [{"value": "1.54764640563e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.392", "key": "score"}]}]} -{"10.1016/j.cad.2007.09.004": [{"id": "influence", "unit": [{"value": "1.53695181735e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0527026176", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.009": [{"id": "influence", "unit": [{"value": "1.47238808768e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.960036561584", "key": "score"}]}]} -{"10.1002/bjs.1800830221": [{"id": "influence", "unit": [{"value": "1.91362377636e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.850333866433", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.010": [{"id": "influence", "unit": [{"value": "1.41383950298e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.002": [{"id": "influence", "unit": [{"value": "1.77651789595e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.6436096", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.002": [{"id": "influence", "unit": [{"value": "1.5588275929e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.39776", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.009": [{"id": "influence", "unit": [{"value": "1.620251802e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.121944990745", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.003": [{"id": "influence", "unit": [{"value": "1.61509969244e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.008", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.003": [{"id": "influence", "unit": [{"value": "1.51109944205e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.004353564672", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.005": [{"id": "influence", "unit": [{"value": "1.61390186863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.2752", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.010": [{"id": "influence", "unit": [{"value": "2.34179271519e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.87600155892", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.004": [{"id": "influence", "unit": [{"value": "1.75892873601e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.291443982336", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.011": [{"id": "influence", "unit": [{"value": "1.95530022024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08646015871", "key": "score"}]}]} -{"10.1002/bjs.1800830222": [{"id": "influence", "unit": [{"value": "1.53993358438e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.005": [{"id": "influence", "unit": [{"value": "2.03453090989e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.6413616128", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.006": [{"id": "influence", "unit": [{"value": "1.52048096498e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0609140736", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.001": [{"id": "influence", "unit": [{"value": "1.62846212143e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.741786539105", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.002": [{"id": "influence", "unit": [{"value": "8.49962253632e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "23.8113807648", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.007": [{"id": "influence", "unit": [{"value": "1.77202790365e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.41948928", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.008": [{"id": "influence", "unit": [{"value": "1.66784168106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72154447872", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.003": [{"id": "influence", "unit": [{"value": "1.91796802049e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.568225265648", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.009": [{"id": "influence", "unit": [{"value": "2.44964116737e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.53599721472", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.007": [{"id": "influence", "unit": [{"value": "1.40307547394e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.004": [{"id": "influence", "unit": [{"value": "1.66189630404e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.883877179392", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1900": [{"id": "influence", "unit": [{"value": "1.41563040465e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.406656", "key": "score"}]}]} -{"10.1002/bjs.1800770817": [{"id": "influence", "unit": [{"value": "2.3414812571e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0769975141418", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.010": [{"id": "influence", "unit": [{"value": "1.5758423276e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2102962176", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.008": [{"id": "influence", "unit": [{"value": "1.48556993479e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.009": [{"id": "influence", "unit": [{"value": "2.44107151341e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.08323656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.011": [{"id": "influence", "unit": [{"value": "1.5461901653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.98336", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.011": [{"id": "influence", "unit": [{"value": "1.56002105062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2387803136", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.012": [{"id": "influence", "unit": [{"value": "3.32215062786e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "15.2602696848", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.005": [{"id": "influence", "unit": [{"value": "1.52580829037e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0655418710426", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.017": [{"id": "influence", "unit": [{"value": "2.83150482952e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.18112656317", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.014": [{"id": "influence", "unit": [{"value": "1.52509378424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.43786968475", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.012": [{"id": "influence", "unit": [{"value": "2.21262421374e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.43258413056", "key": "score"}]}]} -{"10.1002/bjs.1800830223": [{"id": "influence", "unit": [{"value": "1.61300371139e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0535883391846", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.001": [{"id": "influence", "unit": [{"value": "1.50323968183e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43776", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.006": [{"id": "influence", "unit": [{"value": "1.79329991872e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.22408158121", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.018": [{"id": "influence", "unit": [{"value": "1.76307398045e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.44637696", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.002": [{"id": "influence", "unit": [{"value": "1.56246121871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.9248", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.003": [{"id": "influence", "unit": [{"value": "1.4724851416e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.013": [{"id": "influence", "unit": [{"value": "2.37373458092e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.60366727168", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.004": [{"id": "influence", "unit": [{"value": "1.63634379092e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9712", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.007": [{"id": "influence", "unit": [{"value": "1.73920228056e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.77834429874", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.005": [{"id": "influence", "unit": [{"value": "1.44059659429e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.019": [{"id": "influence", "unit": [{"value": "3.3773334495e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.60914034075", "key": "score"}]}]} -{"10.1002/bjs.1800800841": [{"id": "influence", "unit": [{"value": "3.8141878057e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.5575088231", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.014": [{"id": "influence", "unit": [{"value": "2.91623266242e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.58448456158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.006": [{"id": "influence", "unit": [{"value": "1.66124365782e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.3488", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.007": [{"id": "influence", "unit": [{"value": "1.51852019221e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936036561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.001": [{"id": "influence", "unit": [{"value": "1.60137140701e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.223242448896", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.008": [{"id": "influence", "unit": [{"value": "1.53291950533e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.008": [{"id": "influence", "unit": [{"value": "1.90840217859e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.24232596567", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.021": [{"id": "influence", "unit": [{"value": "3.02280680133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.009505792", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.009": [{"id": "influence", "unit": [{"value": "1.54105672704e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.92", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.011": [{"id": "influence", "unit": [{"value": "1.42787789848e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.003": [{"id": "influence", "unit": [{"value": "1.95168880935e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12353159168", "key": "score"}]}]} -{"10.1002/bjs.1800830224": [{"id": "influence", "unit": [{"value": "1.71803713801e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.078672354467", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.001": [{"id": "influence", "unit": [{"value": "1.51853931787e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.13221580699", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.001": [{"id": "influence", "unit": [{"value": "1.61872367055e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.864", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.022": [{"id": "influence", "unit": [{"value": "2.32774512842e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.97683016158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.002": [{"id": "influence", "unit": [{"value": "1.5366499674e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.19939656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.004": [{"id": "influence", "unit": [{"value": "2.73418800808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.24225581534", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.003": [{"id": "influence", "unit": [{"value": "1.51579974618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3056", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.023": [{"id": "influence", "unit": [{"value": "1.59696268486e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.814327296", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.002": [{"id": "influence", "unit": [{"value": "1.61076734804e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.348928244122", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.004": [{"id": "influence", "unit": [{"value": "2.01234339106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.6544", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.005": [{"id": "influence", "unit": [{"value": "3.32115282661e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.32880456158", "key": "score"}]}]} -{"10.1002/bjs.1800770818": [{"id": "influence", "unit": [{"value": "5.68780647438e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1338624097", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.005": [{"id": "influence", "unit": [{"value": "1.59419814162e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.024": [{"id": "influence", "unit": [{"value": "1.76193844694e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.793627857584", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.003": [{"id": "influence", "unit": [{"value": "1.57560737594e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0326025021358", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.006": [{"id": "influence", "unit": [{"value": "1.42561558413e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.006": [{"id": "influence", "unit": [{"value": "1.52287071881e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.79776", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.007": [{"id": "influence", "unit": [{"value": "1.50665203883e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.38336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.025": [{"id": "influence", "unit": [{"value": "1.71087944907e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08354376158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.008": [{"id": "influence", "unit": [{"value": "1.46951625461e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.648036561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.007": [{"id": "influence", "unit": [{"value": "2.32915485084e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3229169705", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.004": [{"id": "influence", "unit": [{"value": "2.36578941835e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.53322412737", "key": "score"}]}]} -{"10.1002/bjs.1800830225": [{"id": "influence", "unit": [{"value": "1.62214790144e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00742227195463", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.009": [{"id": "influence", "unit": [{"value": "1.56048905668e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.416", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.001": [{"id": "influence", "unit": [{"value": "1.48348846065e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.202873856", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.005": [{"id": "influence", "unit": [{"value": "2.30836821787e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.60349958079", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.008": [{"id": "influence", "unit": [{"value": "1.70229830807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.84174336", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.010": [{"id": "influence", "unit": [{"value": "1.59830110366e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.63779656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.002": [{"id": "influence", "unit": [{"value": "1.93327034893e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.56053985758", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.009": [{"id": "influence", "unit": [{"value": "1.91549602574e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.36680192", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.001": [{"id": "influence", "unit": [{"value": "1.45033751388e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.02378336256", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.012": [{"id": "influence", "unit": [{"value": "1.41990928722e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.013": [{"id": "influence", "unit": [{"value": "1.41756265041e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1002/bjs.1800770820": [{"id": "influence", "unit": [{"value": "3.02434333285e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.498025265422", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.014": [{"id": "influence", "unit": [{"value": "2.87658919937e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.1904", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.002": [{"id": "influence", "unit": [{"value": "1.98851018342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.05410185468", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.010": [{"id": "influence", "unit": [{"value": "3.31135702336e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.38206042112", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.004": [{"id": "influence", "unit": [{"value": "1.93237516379e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.268672", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.015": [{"id": "influence", "unit": [{"value": "1.5940790328e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7136", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.016": [{"id": "influence", "unit": [{"value": "1.42020748002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.003": [{"id": "influence", "unit": [{"value": "1.57227629941e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.443832547738", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.011": [{"id": "influence", "unit": [{"value": "1.66228240489e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.55587117056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.001": [{"id": "influence", "unit": [{"value": "1.74463551248e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3856", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.002": [{"id": "influence", "unit": [{"value": "1.68432360071e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0976", "key": "score"}]}]} -{"10.1002/bjs.1800830226": [{"id": "influence", "unit": [{"value": "1.41634774979e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.89730223054e-06", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.004": [{"id": "influence", "unit": [{"value": "1.51058289962e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.059532876841", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.003": [{"id": "influence", "unit": [{"value": "1.41460571706e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.001": [{"id": "influence", "unit": [{"value": "2.11112110818e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.79394605056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.004": [{"id": "influence", "unit": [{"value": "1.51932372739e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.921636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.005": [{"id": "influence", "unit": [{"value": "1.54252858635e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.68256", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.006": [{"id": "influence", "unit": [{"value": "1.63811523553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.46730968475", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.005": [{"id": "influence", "unit": [{"value": "1.79905502333e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4531700736", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.007": [{"id": "influence", "unit": [{"value": "1.86500741891e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.9392", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.008": [{"id": "influence", "unit": [{"value": "1.95218690528e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.87203656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.006": [{"id": "influence", "unit": [{"value": "2.11480233946e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.09623861953", "key": "score"}]}]} -{"10.1002/bjs.1800800842": [{"id": "influence", "unit": [{"value": "3.19483524181e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.815376816948", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.011": [{"id": "influence", "unit": [{"value": "1.65687177507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.95939656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.012": [{"id": "influence", "unit": [{"value": "1.81235414386e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.14407312317", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.013": [{"id": "influence", "unit": [{"value": "1.58278476219e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.9056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.014": [{"id": "influence", "unit": [{"value": "2.49273287575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.1731565616", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.001": [{"id": "influence", "unit": [{"value": "7.17973794162e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "19.0059453231", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.015": [{"id": "influence", "unit": [{"value": "1.44480835961e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.15552", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.001": [{"id": "influence", "unit": [{"value": "2.03198801445e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.15207312317", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.002": [{"id": "influence", "unit": [{"value": "1.52151117534e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1213056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.002": [{"id": "influence", "unit": [{"value": "1.47038759381e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.003": [{"id": "influence", "unit": [{"value": "1.68691283611e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0496", "key": "score"}]}]} -{"10.1002/bjs.1800830228": [{"id": "influence", "unit": [{"value": "3.4727344925e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.52941705521", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.004": [{"id": "influence", "unit": [{"value": "1.50566905194e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32003656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.008": [{"id": "influence", "unit": [{"value": "1.49587208412e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.003": [{"id": "influence", "unit": [{"value": "1.47097833801e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00891997028352", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.009": [{"id": "influence", "unit": [{"value": "1.7180686851e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.31367312317", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.012": [{"id": "influence", "unit": [{"value": "1.59031474173e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.014": [{"id": "influence", "unit": [{"value": "1.46562526844e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.56003656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.001": [{"id": "influence", "unit": [{"value": "2.17323207482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.032", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.004": [{"id": "influence", "unit": [{"value": "1.44142980291e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.002176782336", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.002": [{"id": "influence", "unit": [{"value": "1.62780811555e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7376", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.003": [{"id": "influence", "unit": [{"value": "1.49732150645e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03779656158", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1907": [{"id": "influence", "unit": [{"value": "1.44064265002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.23328", "key": "score"}]}]} -{"10.1002/bjs.1800770822": [{"id": "influence", "unit": [{"value": "2.53706984013e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.16406102839", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.004": [{"id": "influence", "unit": [{"value": "2.03252752514e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.23683656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.006": [{"id": "influence", "unit": [{"value": "1.41406819359e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.31231688013e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.007": [{"id": "influence", "unit": [{"value": "1.4134548894e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.005": [{"id": "influence", "unit": [{"value": "1.92096996879e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.798504402748", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.001": [{"id": "influence", "unit": [{"value": "1.52571226108e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.5792", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.002": [{"id": "influence", "unit": [{"value": "1.92313873921e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.216", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.003": [{"id": "influence", "unit": [{"value": "1.47833902387e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.478656", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.006": [{"id": "influence", "unit": [{"value": "1.99942885863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.58838221914", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.003": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.004": [{"id": "influence", "unit": [{"value": "1.74136707644e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.82727312317", "key": "score"}]}]} -{"10.1002/bjs.1800800843": [{"id": "influence", "unit": [{"value": "1.90295946113e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0467163704882", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.005": [{"id": "influence", "unit": [{"value": "1.66535805466e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4976", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.006": [{"id": "influence", "unit": [{"value": "1.55929432612e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.032", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.001": [{"id": "influence", "unit": [{"value": "1.67638603843e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.335936546243", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.002": [{"id": "influence", "unit": [{"value": "2.54499796895e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.95687312317", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.004": [{"id": "influence", "unit": [{"value": "1.68358939362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.181941313536", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.003": [{"id": "influence", "unit": [{"value": "2.03781614756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.25603656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.002": [{"id": "influence", "unit": [{"value": "3.20375349871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.16853148467", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.006": [{"id": "influence", "unit": [{"value": "1.58270393634e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.988509696", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.004": [{"id": "influence", "unit": [{"value": "1.59625617331e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.004": [{"id": "influence", "unit": [{"value": "6.04960747641e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.6734049382", "key": "score"}]}]} -{"10.1002/bjs.1800830229": [{"id": "influence", "unit": [{"value": "1.55959196631e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.641264975971", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.005": [{"id": "influence", "unit": [{"value": "1.40353242616e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.002": [{"id": "influence", "unit": [{"value": "1.71408534078e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2718196736", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.008": [{"id": "influence", "unit": [{"value": "1.63434633767e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.33923656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.005": [{"id": "influence", "unit": [{"value": "1.98027269671e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.610709669315", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.009": [{"id": "influence", "unit": [{"value": "1.42215367623e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.006": [{"id": "influence", "unit": [{"value": "1.66410516017e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.429584309322", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.010": [{"id": "influence", "unit": [{"value": "1.46447099277e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600073123169", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.003": [{"id": "influence", "unit": [{"value": "1.67426183375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3258493952", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.011": [{"id": "influence", "unit": [{"value": "1.56363960352e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.1056", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.004": [{"id": "influence", "unit": [{"value": "3.59749667999e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.23073974272", "key": "score"}]}]} -{"10.1002/bjs.1800770823": [{"id": "influence", "unit": [{"value": "6.6892359492e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.53423462924", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.012": [{"id": "influence", "unit": [{"value": "1.88010720036e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.71459656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.007": [{"id": "influence", "unit": [{"value": "1.96574804476e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9119619865", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.002": [{"id": "influence", "unit": [{"value": "1.44345474828e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.003": [{"id": "influence", "unit": [{"value": "1.57853853228e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936036561584", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.005": [{"id": "influence", "unit": [{"value": "1.5461465729e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7057536", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.007": [{"id": "influence", "unit": [{"value": "1.62379020748e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.04", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.008": [{"id": "influence", "unit": [{"value": "1.59401878593e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.980407076684", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.006": [{"id": "influence", "unit": [{"value": "4.46655442013e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.70952494013", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.005": [{"id": "influence", "unit": [{"value": "2.23161161778e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0879872", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.009": [{"id": "influence", "unit": [{"value": "2.1096199285e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.48547788882", "key": "score"}]}]} -{"10.1002/bjs.1800830230": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.008": [{"id": "influence", "unit": [{"value": "1.46111297696e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.078656", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.001": [{"id": "influence", "unit": [{"value": "3.00706733095e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.46231057538", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.006": [{"id": "influence", "unit": [{"value": "1.6439374667e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3439026176", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.009": [{"id": "influence", "unit": [{"value": "2.69719564856e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.0302952489", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.002": [{"id": "influence", "unit": [{"value": "1.52911014415e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0349070606991", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.007": [{"id": "influence", "unit": [{"value": "1.85263367415e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8166528", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.010": [{"id": "influence", "unit": [{"value": "1.45469401165e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0228427776", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.003": [{"id": "influence", "unit": [{"value": "1.89493519627e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.24009994797", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.008": [{"id": "influence", "unit": [{"value": "2.59517459944e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.915704832", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.011": [{"id": "influence", "unit": [{"value": "1.86046459817e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.727650816", "key": "score"}]}]} -{"10.1002/bjs.1800800844": [{"id": "influence", "unit": [{"value": "1.57047800382e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000479472211999", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.004": [{"id": "influence", "unit": [{"value": "2.60177802378e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.636079226881", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.008": [{"id": "influence", "unit": [{"value": "1.90122612426e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.2208", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.011": [{"id": "influence", "unit": [{"value": "2.08997502877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.17767312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.012": [{"id": "influence", "unit": [{"value": "2.05274022656e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.29705189376", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.012": [{"id": "influence", "unit": [{"value": "1.59881289625e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.992", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.009": [{"id": "influence", "unit": [{"value": "2.03099988159e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.38091049917", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.005": [{"id": "influence", "unit": [{"value": "1.43984594063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.036216999936", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.013": [{"id": "influence", "unit": [{"value": "1.43047895994e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.013": [{"id": "influence", "unit": [{"value": "1.67511747031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.74549811678", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.001": [{"id": "influence", "unit": [{"value": "1.58287888635e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1296", "key": "score"}]}]} -{"10.1002/bjs.1800770824": [{"id": "influence", "unit": [{"value": "1.74510094006e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000532336899516", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.004": [{"id": "influence", "unit": [{"value": "1.8157272332e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.4112", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.014": [{"id": "influence", "unit": [{"value": "1.97688539752e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.58003306974", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.010": [{"id": "influence", "unit": [{"value": "1.62301306315e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08803656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.011": [{"id": "influence", "unit": [{"value": "1.53599734551e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.5248", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.012": [{"id": "influence", "unit": [{"value": "1.46274993269e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.360036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.006": [{"id": "influence", "unit": [{"value": "1.77932635596e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.01969180672", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.015": [{"id": "influence", "unit": [{"value": "4.42373196686e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.2695436288", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.013": [{"id": "influence", "unit": [{"value": "1.5216135019e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.705636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.001": [{"id": "influence", "unit": [{"value": "1.51955420807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2848", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.002": [{"id": "influence", "unit": [{"value": "2.05886786946e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2938775552", "key": "score"}]}]} -{"10.1002/bjs.1800830231": [{"id": "influence", "unit": [{"value": "1.69616413352e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.682679489185", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.004": [{"id": "influence", "unit": [{"value": "1.52306827774e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00129038820994", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.004": [{"id": "influence", "unit": [{"value": "1.72175768323e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.04963656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.003": [{"id": "influence", "unit": [{"value": "2.00163763167e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.95973175296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.005": [{"id": "influence", "unit": [{"value": "1.96690672988e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.743240150286", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.005": [{"id": "influence", "unit": [{"value": "1.56332373874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.616", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.004": [{"id": "influence", "unit": [{"value": "2.68914111801e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9750194176", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.007": [{"id": "influence", "unit": [{"value": "1.61462321853e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.648", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.006": [{"id": "influence", "unit": [{"value": "1.43605288287e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0101142575844", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.009": [{"id": "influence", "unit": [{"value": "1.49796866189e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.005": [{"id": "influence", "unit": [{"value": "1.42085717449e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1002/bjs.1800770825": [{"id": "influence", "unit": [{"value": "1.72515289261e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.423529266594", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.001": [{"id": "influence", "unit": [{"value": "1.73560539135e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7808", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.008": [{"id": "influence", "unit": [{"value": "1.44769579767e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0101508191688", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.006": [{"id": "influence", "unit": [{"value": "3.09614821228e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2833864704", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.002": [{"id": "influence", "unit": [{"value": "1.40768568577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.007": [{"id": "influence", "unit": [{"value": "1.90393509315e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.05185556958", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.001": [{"id": "influence", "unit": [{"value": "1.54298421438e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.054879399936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.004": [{"id": "influence", "unit": [{"value": "1.46115744106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7056", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.008": [{"id": "influence", "unit": [{"value": "2.09592527715e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.555173012144", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.003": [{"id": "influence", "unit": [{"value": "1.57772239494e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0383067109786", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.010": [{"id": "influence", "unit": [{"value": "2.04492895381e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.07702548958", "key": "score"}]}]} -{"10.1002/bjs.1800800845": [{"id": "influence", "unit": [{"value": "2.05111097877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.748931806934", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.005": [{"id": "influence", "unit": [{"value": "1.5181247267e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7296", "key": "score"}]}]} -{"10.1016/j.cad.2008.03.001": [{"id": "influence", "unit": [{"value": "2.22361376969e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.20860082176", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.010": [{"id": "influence", "unit": [{"value": "2.15762004908e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1616", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.004": [{"id": "influence", "unit": [{"value": "2.45299305703e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.34412580667", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.011": [{"id": "influence", "unit": [{"value": "1.66637169233e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.071177216", "key": "score"}]}]} -{"10.1016/j.cad.2008.03.002": [{"id": "influence", "unit": [{"value": "2.12247153179e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.91892109312", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.011": [{"id": "influence", "unit": [{"value": "1.70824299336e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.79534624634", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.005": [{"id": "influence", "unit": [{"value": "4.73383952013e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.6509988749", "key": "score"}]}]} -{"10.1016/j.cad.2008.03.003": [{"id": "influence", "unit": [{"value": "1.81700367369e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.516087296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.012": [{"id": "influence", "unit": [{"value": "1.49995724598e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.450233856", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.196": [{"id": "influence", "unit": [{"value": "1.41421399515e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1002/bjs.1800830232": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.013": [{"id": "influence", "unit": [{"value": "1.4427952674e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216036561584", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.001": [{"id": "influence", "unit": [{"value": "1.47539204854e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0527026176", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.006": [{"id": "influence", "unit": [{"value": "2.97311157113e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.5246956352", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.003": [{"id": "influence", "unit": [{"value": "1.40070897441e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.013": [{"id": "influence", "unit": [{"value": "2.4911500399e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.9569268832", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.006": [{"id": "influence", "unit": [{"value": "1.71702229619e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.1216", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.004": [{"id": "influence", "unit": [{"value": "3.05797070316e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.96109521374", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.007": [{"id": "influence", "unit": [{"value": "2.09569501031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.69190753748", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.007": [{"id": "influence", "unit": [{"value": "2.18615295637e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.60487312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.017": [{"id": "influence", "unit": [{"value": "1.5747753576e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43566336", "key": "score"}]}]} -{"10.1002/bjs.1800770826": [{"id": "influence", "unit": [{"value": "1.42464622401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.02349036908e-06", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.005": [{"id": "influence", "unit": [{"value": "1.8512784976e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2022628352", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.008": [{"id": "influence", "unit": [{"value": "1.90646183065e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.616", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.001": [{"id": "influence", "unit": [{"value": "1.65178100351e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.018756889674", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.018": [{"id": "influence", "unit": [{"value": "2.10323641562e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.426622976", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.009": [{"id": "influence", "unit": [{"value": "1.58797981286e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.41123656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.006": [{"id": "influence", "unit": [{"value": "2.31648466547e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7242642432", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.004": [{"id": "influence", "unit": [{"value": "2.48984766568e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.88448775304", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.019": [{"id": "influence", "unit": [{"value": "1.52267206004e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.6039936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.03.001": [{"id": "influence", "unit": [{"value": "1.6956111114e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.23363656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.007": [{"id": "influence", "unit": [{"value": "1.74859637039e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.689467392", "key": "score"}]}]} -{"10.1002/bjs.1800830233": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.020": [{"id": "influence", "unit": [{"value": "2.19142078922e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.73152", "key": "score"}]}]} -{"10.1016/j.bspc.2016.03.002": [{"id": "influence", "unit": [{"value": "1.49353114625e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4592", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.005": [{"id": "influence", "unit": [{"value": "2.00631619007e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.45052106975", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.002": [{"id": "influence", "unit": [{"value": "1.80280869063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33571041758", "key": "score"}]}]} -{"10.1016/j.bspc.2016.04.001": [{"id": "influence", "unit": [{"value": "1.47432609805e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.006": [{"id": "influence", "unit": [{"value": "1.64580561258e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.926956302336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.001": [{"id": "influence", "unit": [{"value": "1.9386600371e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.392983761584", "key": "score"}]}]} -{"10.1016/j.bspc.2016.04.002": [{"id": "influence", "unit": [{"value": "1.77732240919e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.001": [{"id": "influence", "unit": [{"value": "3.65276112913e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.34267874689", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.002": [{"id": "influence", "unit": [{"value": "1.67228822033e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.059357696", "key": "score"}]}]} -{"10.1002/bjs.1800800846": [{"id": "influence", "unit": [{"value": "1.87426454575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000824941606699", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.003": [{"id": "influence", "unit": [{"value": "3.08990827031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1503314944", "key": "score"}]}]} -{"10.1016/j.bspc.2016.04.004": [{"id": "influence", "unit": [{"value": "1.55323901681e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89607312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.004": [{"id": "influence", "unit": [{"value": "3.21801438375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.8529477632", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.003": [{"id": "influence", "unit": [{"value": "1.59273285306e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.175314287002", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.001": [{"id": "influence", "unit": [{"value": "1.4242561143e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.003": [{"id": "influence", "unit": [{"value": "2.43015778095e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.805398016", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.002": [{"id": "influence", "unit": [{"value": "1.76239322363e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.544", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.004": [{"id": "influence", "unit": [{"value": "1.54566569959e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.33416696732", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.005": [{"id": "influence", "unit": [{"value": "2.31299690805e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.32781914112", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.004": [{"id": "influence", "unit": [{"value": "2.17857956905e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.86970993118", "key": "score"}]}]} -{"10.1002/bjs.1800830234": [{"id": "influence", "unit": [{"value": "1.93299516968e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.410114635488", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.003": [{"id": "influence", "unit": [{"value": "1.53173355172e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.416", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.006": [{"id": "influence", "unit": [{"value": "2.99072062287e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.99053536189", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.005": [{"id": "influence", "unit": [{"value": "2.06889594575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.256080896", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.004": [{"id": "influence", "unit": [{"value": "2.36226956363e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.4432", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.007": [{"id": "influence", "unit": [{"value": "3.14923020973e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.691949568", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.005": [{"id": "influence", "unit": [{"value": "2.03490674279e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.76807312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.006": [{"id": "influence", "unit": [{"value": "1.97919772747e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.8754523136", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.008": [{"id": "influence", "unit": [{"value": "2.25742850662e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.10430813595", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.006": [{"id": "influence", "unit": [{"value": "1.78325655416e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.24007312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.001": [{"id": "influence", "unit": [{"value": "1.94166917577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.134853739184", "key": "score"}]}]} -{"10.1002/bjs.1800770827": [{"id": "influence", "unit": [{"value": "1.76320623916e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.254133214525", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.009": [{"id": "influence", "unit": [{"value": "1.7698892709e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.136", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.007": [{"id": "influence", "unit": [{"value": "2.01560366604e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.606272", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.002": [{"id": "influence", "unit": [{"value": "3.17149463925e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.09275075038", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.001": [{"id": "influence", "unit": [{"value": "1.53072410751e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.896", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.008": [{"id": "influence", "unit": [{"value": "1.4541483793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0060466176", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.003": [{"id": "influence", "unit": [{"value": "1.40933402967e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.003": [{"id": "influence", "unit": [{"value": "2.9979841587e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.926787936944", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.004": [{"id": "influence", "unit": [{"value": "1.40645973e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.009": [{"id": "influence", "unit": [{"value": "1.7132748437e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.362432721584", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.004": [{"id": "influence", "unit": [{"value": "2.50164812326e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.88192223232", "key": "score"}]}]} -{"10.1002/bjs.1800800847": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.005": [{"id": "influence", "unit": [{"value": "1.74665165591e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.4816", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.005": [{"id": "influence", "unit": [{"value": "2.45696962307e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.99574209203", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.005": [{"id": "influence", "unit": [{"value": "1.59559459456e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50693363712", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.010": [{"id": "influence", "unit": [{"value": "1.48583201429e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.056733696", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.006": [{"id": "influence", "unit": [{"value": "1.52398390667e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.156473856", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.006": [{"id": "influence", "unit": [{"value": "1.4411957679e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.006": [{"id": "influence", "unit": [{"value": "2.02062687478e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.43986364416", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.007": [{"id": "influence", "unit": [{"value": "1.4074035125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.011": [{"id": "influence", "unit": [{"value": "1.85417577652e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1984587776", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.009": [{"id": "influence", "unit": [{"value": "1.75006832271e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.833871622963", "key": "score"}]}]} -{"10.1002/bjs.1800830235": [{"id": "influence", "unit": [{"value": "4.57466352678e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.34267976794", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.007": [{"id": "influence", "unit": [{"value": "1.6746709597e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.207396561584", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.010": [{"id": "influence", "unit": [{"value": "1.5121257956e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.010": [{"id": "influence", "unit": [{"value": "1.80911809391e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.00599210096", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.012": [{"id": "influence", "unit": [{"value": "1.69114717345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.291776", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.001": [{"id": "influence", "unit": [{"value": "2.10035086605e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2657166336", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.012": [{"id": "influence", "unit": [{"value": "1.69779333017e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.72", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.002": [{"id": "influence", "unit": [{"value": "1.78162325028e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1503188566", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.003": [{"id": "influence", "unit": [{"value": "1.65953020168e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.5175936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.013": [{"id": "influence", "unit": [{"value": "1.65255327378e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.88007312317", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.014": [{"id": "influence", "unit": [{"value": "1.4134548894e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800770828": [{"id": "influence", "unit": [{"value": "1.86514259401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.833593408536", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.003": [{"id": "influence", "unit": [{"value": "1.97562905655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.55319395228", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.004": [{"id": "influence", "unit": [{"value": "2.15409428027e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.509453312", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.015": [{"id": "influence", "unit": [{"value": "1.57786086387e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.92963656158", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.016": [{"id": "influence", "unit": [{"value": "1.50216812068e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4496", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.004": [{"id": "influence", "unit": [{"value": "1.43075299138e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00683025924096", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.005": [{"id": "influence", "unit": [{"value": "1.91398146851e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0908104751844", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.017": [{"id": "influence", "unit": [{"value": "1.43750163699e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.005": [{"id": "influence", "unit": [{"value": "1.85585291498e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.456078732756", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.006": [{"id": "influence", "unit": [{"value": "2.51055931908e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.33129063357", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.018": [{"id": "influence", "unit": [{"value": "1.56828824848e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49603656158", "key": "score"}]}]} -{"10.1002/bjs.1800830236": [{"id": "influence", "unit": [{"value": "2.10974073898e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0634013861571", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.007": [{"id": "influence", "unit": [{"value": "1.57639934401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7862962176", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.006": [{"id": "influence", "unit": [{"value": "1.7374230549e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2753957615", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.020": [{"id": "influence", "unit": [{"value": "1.84666648997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.752", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.008": [{"id": "influence", "unit": [{"value": "2.15821065694e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.2964569088", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.008": [{"id": "influence", "unit": [{"value": "1.56115937004e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0796495967688", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.002": [{"id": "influence", "unit": [{"value": "1.47691612934e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.001": [{"id": "influence", "unit": [{"value": "1.45887434945e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.009": [{"id": "influence", "unit": [{"value": "4.57902670991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.11395509574", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.003": [{"id": "influence", "unit": [{"value": "1.71436278381e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.392", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.002": [{"id": "influence", "unit": [{"value": "1.5539682507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2289586176", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.199": [{"id": "influence", "unit": [{"value": "1.71476015775e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.90165576158", "key": "score"}]}]} -{"10.1002/bjs.1800800849": [{"id": "influence", "unit": [{"value": "3.09530027559e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49082768433", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.001": [{"id": "influence", "unit": [{"value": "1.55886217247e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.023277059113", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.003": [{"id": "influence", "unit": [{"value": "1.57617414217e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.424633856", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.005": [{"id": "influence", "unit": [{"value": "1.95192226147e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.224", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.003": [{"id": "influence", "unit": [{"value": "1.92367459178e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.528195031565", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.004": [{"id": "influence", "unit": [{"value": "2.17373591838e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.37798875136", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.006": [{"id": "influence", "unit": [{"value": "1.61114213421e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.312", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.004": [{"id": "influence", "unit": [{"value": "2.33702357833e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.63762464588", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.005": [{"id": "influence", "unit": [{"value": "2.69563597746e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.2866942976", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.008": [{"id": "influence", "unit": [{"value": "1.73096493179e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.1216", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.005": [{"id": "influence", "unit": [{"value": "1.50907350045e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.102595742761", "key": "score"}]}]} -{"10.1002/bjs.1800830237": [{"id": "influence", "unit": [{"value": "2.31106043214e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.210221241405", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.010": [{"id": "influence", "unit": [{"value": "1.91545566337e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.072", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.006": [{"id": "influence", "unit": [{"value": "1.59244027261e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.49161517056", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.011": [{"id": "influence", "unit": [{"value": "1.50397644161e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.001": [{"id": "influence", "unit": [{"value": "1.94763030991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.32440162861", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.014": [{"id": "influence", "unit": [{"value": "1.48124323768e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.432", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.002": [{"id": "influence", "unit": [{"value": "3.00494323511e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.4027693056", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.002": [{"id": "influence", "unit": [{"value": "2.53857687295e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.74619584076", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.015": [{"id": "influence", "unit": [{"value": "1.57036055242e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.632", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.003": [{"id": "influence", "unit": [{"value": "1.80864486244e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.19054592", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.003": [{"id": "influence", "unit": [{"value": "1.64757827667e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.185750234726", "key": "score"}]}]} -{"10.1002/bjs.1800770830": [{"id": "influence", "unit": [{"value": "2.1717988111e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00307941311092", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.002": [{"id": "influence", "unit": [{"value": "1.57522435727e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.53603656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.004": [{"id": "influence", "unit": [{"value": "2.20693385362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.53278259678", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.008": [{"id": "influence", "unit": [{"value": "1.46628397755e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.004": [{"id": "influence", "unit": [{"value": "1.44180245944e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.004353564672", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.005": [{"id": "influence", "unit": [{"value": "4.21350036174e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.3617120256", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.010": [{"id": "influence", "unit": [{"value": "1.57228551928e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.992", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.013": [{"id": "influence", "unit": [{"value": "1.40919153834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.006": [{"id": "influence", "unit": [{"value": "1.67836187004e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.06976", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.005": [{"id": "influence", "unit": [{"value": "1.6429360297e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0871710356608", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.014": [{"id": "influence", "unit": [{"value": "1.47004185386e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432073123169", "key": "score"}]}]} -{"10.1002/bjs.1800800850": [{"id": "influence", "unit": [{"value": "1.45655493484e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.47799794434e-05", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.007": [{"id": "influence", "unit": [{"value": "1.45629653212e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1057536", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.006": [{"id": "influence", "unit": [{"value": "2.32289086552e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.45411454376", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.015": [{"id": "influence", "unit": [{"value": "1.44553926616e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.176", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.013": [{"id": "influence", "unit": [{"value": "2.60304743482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.23790336", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.008": [{"id": "influence", "unit": [{"value": "1.55644587328e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.614071296", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.018": [{"id": "influence", "unit": [{"value": "1.7716798502e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.98403656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.009": [{"id": "influence", "unit": [{"value": "1.56852779265e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.064945152", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.019": [{"id": "influence", "unit": [{"value": "1.4051013807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.014": [{"id": "influence", "unit": [{"value": "4.16520176181e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.18018171358", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.007": [{"id": "influence", "unit": [{"value": "1.42012974149e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000470184984576", "key": "score"}]}]} -{"10.1002/bjs.1800830238": [{"id": "influence", "unit": [{"value": "2.35702875882e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.961022410653", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.020": [{"id": "influence", "unit": [{"value": "1.81191933705e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.76", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.010": [{"id": "influence", "unit": [{"value": "2.2300724133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.901495296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.008": [{"id": "influence", "unit": [{"value": "3.26455792748e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.40855210787", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.001": [{"id": "influence", "unit": [{"value": "1.95265402863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.289997312", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.023": [{"id": "influence", "unit": [{"value": "1.44610848556e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.011": [{"id": "influence", "unit": [{"value": "1.56438579362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.633424896", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.024": [{"id": "influence", "unit": [{"value": "1.52455960767e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.416", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.001": [{"id": "influence", "unit": [{"value": "1.40652400555e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.002": [{"id": "influence", "unit": [{"value": "1.81296293653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.0816256", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.012": [{"id": "influence", "unit": [{"value": "1.86407798247e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.6482226176", "key": "score"}]}]} -{"10.1002/bjs.1800770831": [{"id": "influence", "unit": [{"value": "2.33499424669e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.224565660204", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.002": [{"id": "influence", "unit": [{"value": "1.48710792066e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.112", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.002": [{"id": "influence", "unit": [{"value": "1.84385083234e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3555308798", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.003": [{"id": "influence", "unit": [{"value": "1.82138256221e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.79170816", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.014": [{"id": "influence", "unit": [{"value": "3.47023350086e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.33522248158", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.003": [{"id": "influence", "unit": [{"value": "1.7243704389e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.73603656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.004": [{"id": "influence", "unit": [{"value": "1.96614244154e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.64679752158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.003": [{"id": "influence", "unit": [{"value": "1.84849867761e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.866660424745", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.005": [{"id": "influence", "unit": [{"value": "2.23601105505e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.32003656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.015": [{"id": "influence", "unit": [{"value": "2.05692337575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12908712414", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.007": [{"id": "influence", "unit": [{"value": "2.41073729092e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.1440365616", "key": "score"}]}]} -{"10.1002/bjs.1800830239": [{"id": "influence", "unit": [{"value": "3.46552953627e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1368777166", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.005": [{"id": "influence", "unit": [{"value": "4.3579701733e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.64887277434", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.016": [{"id": "influence", "unit": [{"value": "1.82885876549e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.767503872", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.004": [{"id": "influence", "unit": [{"value": "1.40616171935e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.008223399936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.008": [{"id": "influence", "unit": [{"value": "2.00650284933e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.25607312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.001": [{"id": "influence", "unit": [{"value": "2.63357101584e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.79492096", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.006": [{"id": "influence", "unit": [{"value": "1.47565045883e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.227515392", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.005": [{"id": "influence", "unit": [{"value": "2.33561886021e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.30555116965", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.009": [{"id": "influence", "unit": [{"value": "1.75755379814e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.688", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.003": [{"id": "influence", "unit": [{"value": "3.1616178764e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.11116001758", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.007": [{"id": "influence", "unit": [{"value": "2.38387637008e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.18710016", "key": "score"}]}]} -{"10.1002/bjs.1800800851": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bulm.2005.01.001": [{"id": "influence", "unit": [{"value": "1.42647191702e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03162157056", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.010": [{"id": "influence", "unit": [{"value": "1.63573718026e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.88", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.006": [{"id": "influence", "unit": [{"value": "1.87221293034e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576571392", "key": "score"}]}]} -{"10.1016/j.bulm.2005.01.002": [{"id": "influence", "unit": [{"value": "4.55573629147e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.7926573464", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.011": [{"id": "influence", "unit": [{"value": "1.64394416726e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.136", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.007": [{"id": "influence", "unit": [{"value": "1.50436953162e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.134493696", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.008": [{"id": "influence", "unit": [{"value": "1.98956540239e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.282046161584", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.014": [{"id": "influence", "unit": [{"value": "1.53911323715e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2005.01.003": [{"id": "influence", "unit": [{"value": "1.89561182084e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.52232193858", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.008": [{"id": "influence", "unit": [{"value": "1.41417356353e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0279936", "key": "score"}]}]} -{"10.1002/bjs.1800770832": [{"id": "influence", "unit": [{"value": "1.81095076918e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0101951064951", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.009": [{"id": "influence", "unit": [{"value": "2.56761001538e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.050598912", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.015": [{"id": "influence", "unit": [{"value": "1.63966184918e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.136", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.009": [{"id": "influence", "unit": [{"value": "5.09401674186e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.51791044608", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.016": [{"id": "influence", "unit": [{"value": "1.46967592725e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.536", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.010": [{"id": "influence", "unit": [{"value": "2.84315984215e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.649660052144", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.010": [{"id": "influence", "unit": [{"value": "1.45603710668e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.017": [{"id": "influence", "unit": [{"value": "1.51152226412e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json deleted file mode 100644 index e5e70cb27..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json +++ /dev/null @@ -1,18 +0,0 @@ -{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} -{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} -{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} -{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} -{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} -{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} -{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} -{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} -{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} -{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} -{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} -{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} -{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} -{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} -{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} -{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} -{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} -{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json deleted file mode 100644 index f849811ab..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json +++ /dev/null @@ -1,18 +0,0 @@ -{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} -{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} -{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} -{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} -{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} -{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value": "10.1016/j.buildenv.2010.01.008"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} -{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} -{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} -{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} -{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} -{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} -{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} -{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} -{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} -{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} -{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} -{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} -{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json deleted file mode 100644 index 521f4959c..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json +++ /dev/null @@ -1,18 +0,0 @@ -{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} -{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} -{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} -{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} -{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} -{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} -{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} -{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} -{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} -{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} -{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} -{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} -{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} -{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} -{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} -{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} -{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.008"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} -{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json deleted file mode 100644 index 6a4506816..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publicationoaid.json +++ /dev/null @@ -1,18 +0,0 @@ -{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} -{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::05b1f8ce98702f69d07aa5f0429de1e3","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} -{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::08823c8f5c3ca2eae523817036cdda67","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} -{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::0e72b399325d6efcbe3271891a1dfe4c","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} -{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} -{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} -{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} -{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} -{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} -{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} -{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} -{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} -{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} -{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} -{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} -{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} -{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} -{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file From 2c126ed014b2e0b032cb5eb9359acfbb5655765c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 16:03:41 +0100 Subject: [PATCH 106/176] [BipFinder] create unresolved entities with measures at the level of the instance --- .../createunresolvedentities/PrepareBipFinder.java | 10 +++++++--- .../createunresolvedentities/SparkSaveUnresolved.java | 4 ++-- .../createunresolvedentities/PrepareTest.java | 8 +++++++- .../createunresolvedentities/ProduceTest.java | 6 ++++-- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index c7049430e..ab740a024 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -6,10 +6,12 @@ import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.UPDATE_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.Instance; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -39,7 +41,7 @@ public class PrepareBipFinder implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -75,7 +77,7 @@ public class PrepareBipFinder implements Serializable { }); } - private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { + private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -95,7 +97,9 @@ public class PrepareBipFinder implements Serializable { Result r = new Result(); r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI)); - r.setMeasures(getMeasure(v)); + Instance inst = new Instance(); + inst.setMeasures(getMeasure(v)); + r.setInstance(Arrays.asList(inst)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index 918f6a417..1a115e18f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Result; public class SparkSaveUnresolved implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); + private static final Logger log = LoggerFactory.getLogger(SparkSaveUnresolved.class); public static void main(String[] args) throws Exception { @@ -64,7 +64,7 @@ public class SparkSaveUnresolved implements Serializable { .map( (MapFunction) l -> OBJECT_MAPPER.readValue(l, Result.class), Encoders.bean(Result.class)) - .groupByKey((MapFunction) r -> r.getId(), Encoders.STRING()) + .groupByKey((MapFunction) Result::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = it.next(); it.forEachRemaining(r -> ret.mergeFrom(r)); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index c48ccc8c2..eb9956d80 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -96,13 +96,15 @@ public class PrepareTest { String doi1 = "unresolved::10.0000/096020199389707::doi"; Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); - Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getMeasures().size()); + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().size()); + Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().get(0).getMeasures().size()); Assertions .assertEquals( "6.34596412687e-09", tmp .filter(r -> r.getId().equals(doi1)) .collect() .get(0) + .getInstance().get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("influence")) @@ -117,6 +119,8 @@ public class PrepareTest { .filter(r -> r.getId().equals(doi1)) .collect() .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("popularity_alt")) @@ -131,6 +135,8 @@ public class PrepareTest { .filter(r -> r.getId().equals(doi1)) .collect() .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("popularity")) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index b77b5bb36..4522cbf07 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -126,6 +126,7 @@ public class ProduceTest { .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .collect() .get(0) + .getInstance().get(0) .getMeasures() .size()); @@ -179,7 +180,8 @@ public class ProduceTest { List measures = tmp .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .flatMap(row -> row.getMeasures().iterator()) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(inst -> inst.getMeasures().iterator()) .collect(); Assertions .assertEquals( @@ -226,7 +228,7 @@ public class ProduceTest { 85, tmp .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) - .filter(r -> r.getMeasures() != null) + .filter(r -> r.getInstance() != null) .count()); } From 813f856d3ffcb3d2b2d6afd9c18175ab6dd595b8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 16:11:12 +0100 Subject: [PATCH 107/176] [BipFinder] removing left over parameter in wf --- .../dhp/actionmanager/bipfinder/oozie_app/workflow.xml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml index 5b97a3d1a..fdf7129d6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml @@ -1,9 +1,5 @@ - - inputPath - the input path of the resources to be extended - bipScorePath @@ -107,8 +103,6 @@ - - - + \ No newline at end of file From 20ef1d657ff93682a376145bd66f0e0dab0ac81e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 16:26:36 +0100 Subject: [PATCH 108/176] refactoring --- .../createunresolvedentities/PrepareBipFinder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index ab740a024..8d89d958d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,7 +11,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.oaf.Instance; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -30,6 +29,7 @@ import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; @@ -41,7 +41,7 @@ public class PrepareBipFinder implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( From 34ac56565d33bfd4781036593bd649c05a5385d8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 16:28:11 +0100 Subject: [PATCH 109/176] refactoring --- .../bipfinder/oozie_app/workflow.xml | 2 +- .../createunresolvedentities/PrepareTest.java | 15 +++++++++------ .../createunresolvedentities/ProduceTest.java | 5 +++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml index fdf7129d6..a6fba853d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml @@ -103,6 +103,6 @@ - + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index eb9956d80..125aa60fe 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -97,14 +97,17 @@ public class PrepareTest { Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().size()); - Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().get(0).getMeasures().size()); + Assertions + .assertEquals( + 3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().get(0).getMeasures().size()); Assertions .assertEquals( "6.34596412687e-09", tmp .filter(r -> r.getId().equals(doi1)) .collect() .get(0) - .getInstance().get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("influence")) @@ -119,8 +122,8 @@ public class PrepareTest { .filter(r -> r.getId().equals(doi1)) .collect() .get(0) - .getInstance() - .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("popularity_alt")) @@ -135,8 +138,8 @@ public class PrepareTest { .filter(r -> r.getId().equals(doi1)) .collect() .get(0) - .getInstance() - .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("popularity")) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 4522cbf07..36417f614 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -126,7 +126,8 @@ public class ProduceTest { .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .collect() .get(0) - .getInstance().get(0) + .getInstance() + .get(0) .getMeasures() .size()); @@ -181,7 +182,7 @@ public class ProduceTest { List measures = tmp .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .flatMap(row -> row.getInstance().iterator()) - .flatMap(inst -> inst.getMeasures().iterator()) + .flatMap(inst -> inst.getMeasures().iterator()) .collect(); Assertions .assertEquals( From de6c4c896866b7d4af41a63e18afb69333ce42f6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 16:44:44 +0100 Subject: [PATCH 110/176] [FOS]creation of the unresolved entities: remove the split for the doi: no more needed since each row is related to one doi --- .../{bipmodel => }/Constants.java | 37 ++++++++++++- .../bipfinder/SparkAtomicActionScoreJob.java | 2 +- .../PrepareBipFinder.java | 4 +- .../PrepareFOSSparkJob.java | 52 ++----------------- .../SparkSaveUnresolved.java | 2 +- 5 files changed, 45 insertions(+), 52 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipmodel => }/Constants.java (58%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java similarity index 58% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Constants.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 131d5fa07..7a3814b71 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -1,8 +1,11 @@ -package eu.dnetlib.dhp.actionmanager.bipmodel; +package eu.dnetlib.dhp.actionmanager; import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -46,4 +49,36 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } + public static StructuredProperty getSubject(String sbj, String classid, String classname) { + if (sbj.equals(NULL)) + return null; + StructuredProperty sp = new StructuredProperty(); + sp.setValue(sbj); + sp + .setQualifier( + OafMapperUtils + .qualifier(classid + , + classname, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES)); + sp + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, + OafMapperUtils + .qualifier( + UPDATE_SUBJECT_FOS_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); + + return sp; + + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index fafa209ea..ddf5f4adf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; -import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; +import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 8d89d958d..e9c9f0350 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; -import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.UPDATE_CLASS_NAME; +import static eu.dnetlib.dhp.actionmanager.Constants.*; +import static eu.dnetlib.dhp.actionmanager.Constants.UPDATE_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 558f51f3f..1a6e9ddfc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; +import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -10,7 +10,6 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -21,10 +20,8 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareFOSSparkJob implements Serializable { @@ -67,17 +64,7 @@ public class PrepareFOSSparkJob implements Serializable { private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); - fosDataset.flatMap((FlatMapFunction) v -> { - List fosList = new ArrayList<>(); - final String level1 = v.getLevel1(); - final String level2 = v.getLevel2(); - final String level3 = v.getLevel3(); - Arrays - .stream(v.getDoi().split("\u0002")) - .forEach(d -> fosList.add(FOSDataModel.newInstance(d, level1, level2, level3))); - return fosList.iterator(); - }, Encoders.bean(FOSDataModel.class)) - .map((MapFunction) value -> { + fosDataset.map((MapFunction) value -> { Result r = new Result(); r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), DOI)); r.setSubject(getSubjects(value)); @@ -91,43 +78,14 @@ public class PrepareFOSSparkJob implements Serializable { private static List getSubjects(FOSDataModel fos) { return Arrays - .asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) + .asList(getSubject(fos.getLevel1(), FOS_CLASS_ID, FOS_CLASS_NAME), + getSubject(fos.getLevel2(), FOS_CLASS_ID, FOS_CLASS_NAME), + getSubject(fos.getLevel3(), FOS_CLASS_ID, FOS_CLASS_NAME)) .stream() .filter(Objects::nonNull) .collect(Collectors.toList()); } - private static StructuredProperty getSubject(String sbj) { - if (sbj.equals(NULL)) - return null; - StructuredProperty sp = new StructuredProperty(); - sp.setValue(sbj); - sp - .setQualifier( - OafMapperUtils - .qualifier( - FOS_CLASS_ID, - FOS_CLASS_NAME, - ModelConstants.DNET_SUBJECT_TYPOLOGIES, - ModelConstants.DNET_SUBJECT_TYPOLOGIES)); - sp - .setDataInfo( - OafMapperUtils - .dataInfo( - false, - UPDATE_DATA_INFO_TYPE, - true, - false, - OafMapperUtils - .qualifier( - UPDATE_SUBJECT_FOS_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - "")); - return sp; - - } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index 1a115e18f..ab8356836 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.bipmodel.Constants.*; +import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; From b81efb6a9d0c8dac4b4cc551e7574878b1e377f8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 21:40:35 +0100 Subject: [PATCH 111/176] [FOS]changed the mapping between the csv and the model. Changed Test classes and resources --- .../dnetlib/dhp/actionmanager/Constants.java | 46 +++++------ .../createunresolvedentities/GetFOSData.java | 2 +- .../PrepareFOSSparkJob.java | 35 +++++---- .../model/FOSDataModel.java | 8 +- .../createunresolvedentities/PrepareTest.java | 6 +- .../createunresolvedentities/ProduceTest.java | 25 ++++-- .../createunresolvedentities/fos/fos.json | 77 ++++++++++--------- .../createunresolvedentities/fos/fos_sbs.csv | 39 ++++++++++ .../fos/h2020_fos_sbs.csv | 38 --------- 9 files changed, 148 insertions(+), 128 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 7a3814b71..153a98d3f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -3,9 +3,6 @@ package eu.dnetlib.dhp.actionmanager; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -14,6 +11,9 @@ import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class Constants { @@ -55,28 +55,28 @@ public class Constants { StructuredProperty sp = new StructuredProperty(); sp.setValue(sbj); sp - .setQualifier( - OafMapperUtils - .qualifier(classid - , - classname, - ModelConstants.DNET_SUBJECT_TYPOLOGIES, - ModelConstants.DNET_SUBJECT_TYPOLOGIES)); + .setQualifier( + OafMapperUtils + .qualifier( + classid, + classname, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES)); sp - .setDataInfo( + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, OafMapperUtils - .dataInfo( - false, - UPDATE_DATA_INFO_TYPE, - true, - false, - OafMapperUtils - .qualifier( - UPDATE_SUBJECT_FOS_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - "")); + .qualifier( + UPDATE_SUBJECT_FOS_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); return sp; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java index 9dec3e862..bd430d940 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java @@ -21,7 +21,7 @@ public class GetFOSData implements Serializable { private static final Logger log = LoggerFactory.getLogger(GetFOSData.class); - public static final char DEFAULT_DELIMITER = '\t'; + public static final char DEFAULT_DELIMITER = ','; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 1a6e9ddfc..8aadaf98e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -11,6 +11,7 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -64,26 +65,32 @@ public class PrepareFOSSparkJob implements Serializable { private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); - fosDataset.map((MapFunction) value -> { - Result r = new Result(); - r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), DOI)); - r.setSubject(getSubjects(value)); - return r; - }, Encoders.bean(Result.class)) + fosDataset.groupByKey((MapFunction)v->v.getDoi(), Encoders.STRING()) + .mapGroups((MapGroupsFunction)(k,it)->{ + Result r = new Result(); + FOSDataModel first = it.next(); + r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI)); + HashSet level1 = new HashSet<>(); + HashSet level2 = new HashSet<>(); + HashSet level3 = new HashSet<>(); + addLevels(level1, level2, level3, first); + it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); + Listsbjs = new ArrayList<>(); + level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + return r; + }, Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + "/fos"); } - private static List getSubjects(FOSDataModel fos) { - return Arrays - .asList(getSubject(fos.getLevel1(), FOS_CLASS_ID, FOS_CLASS_NAME), - getSubject(fos.getLevel2(), FOS_CLASS_ID, FOS_CLASS_NAME), - getSubject(fos.getLevel3(), FOS_CLASS_ID, FOS_CLASS_NAME)) - .stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); + private static void addLevels(HashSet level1, HashSet level2, HashSet level3, FOSDataModel first) { + level1.add(first.getLevel1()); + level2.add(first.getLevel2()); + level3.add(first.getLevel3()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java index befb230cb..e98ba74a1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java @@ -6,19 +6,19 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByPosition; public class FOSDataModel implements Serializable { - @CsvBindByPosition(position = 1) + @CsvBindByPosition(position = 0) // @CsvBindByName(column = "doi") private String doi; - @CsvBindByPosition(position = 2) + @CsvBindByPosition(position = 1) // @CsvBindByName(column = "level1") private String level1; - @CsvBindByPosition(position = 3) + @CsvBindByPosition(position = 2) // @CsvBindByName(column = "level2") private String level2; - @CsvBindByPosition(position = 4) + @CsvBindByPosition(position = 3) // @CsvBindByName(column = "level3") private String level3; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 125aa60fe..67a0f3465 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -155,14 +155,14 @@ public class PrepareTest { void getFOSFileTest() throws IOException, ClassNotFoundException { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv") + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv") .getPath(); final String outputPath = workingDir.toString() + "/fos.json"; new GetFOSData() .doRewrite( sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel", - '\t', fs); + ',', fs); BufferedReader in = new BufferedReader( new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); @@ -176,7 +176,7 @@ public class PrepareTest { count += 1; } - assertEquals(38, count); + assertEquals(39, count); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 36417f614..8635dcfb8 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -107,13 +107,13 @@ public class ProduceTest { .textFile(workingDir.toString() + "/unresolved") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - Assertions.assertEquals(135, tmp.count()); + Assertions.assertEquals(105, tmp.count()); Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")).count()); Assertions .assertEquals( - 3, tmp + 6, tmp .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .collect() .get(0) @@ -175,9 +175,20 @@ public class ProduceTest { ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemename())); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("engineering and technology")); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology")); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nanoscience & nanotechnology")); + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + Assertions.assertEquals(false, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology"))); + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); List measures = tmp .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) @@ -219,7 +230,7 @@ public class ProduceTest { Assertions .assertEquals( - 49, tmp + 19, tmp .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) .filter(row -> row.getSubject() != null) .count()); @@ -229,7 +240,7 @@ public class ProduceTest { 85, tmp .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) - .filter(r -> r.getInstance() != null) + .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) .count()); } diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json index 1b46a3d25..a8221324f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json @@ -1,38 +1,39 @@ -{"doi":"10.3390/s18072310","level1":"engineering and technology","level2":"nano-technology","level3":"nanoscience & nanotechnology"} -{"doi":"10.1111/1365-2656.12831\u000210.17863/cam.24369","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} -{"doi":"10.3929/ethz-b-000187584\u000210.1002/chem.201701644","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.1080/01913123.2017.1367361","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"10.1051/e3sconf/20199207011","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"environmental sciences"} -{"doi":"10.1038/onc.2015.333","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"10.1093/mnras/staa256","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"10.1016/j.jclepro.2018.07.166","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"10.1103/physrevlett.125.037403","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1080/03602532.2017.1316285","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.1001/jamanetworkopen.2019.1868","level1":"medical and health sciences","level2":"other medical science","level3":"health policy & services"} -{"doi":"10.1128/mra.00874-18","level1":"natural sciences","level2":"biological sciences","level3":"plant biology & botany"} -{"doi":"10.1016/j.nancom.2018.03.001","level1":"engineering and technology","level2":"NULL","level3":"NULL"} -{"doi":"10.1112/topo.12174","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.12688/wellcomeopenres.15846.1","level1":"medical and health sciences","level2":"health sciences","level3":"NULL"} -{"doi":"10.21468/scipostphys.3.1.001","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"10.1088/1741-4326/ab6c77","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1109/tpwrs.2019.2944747","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"10.1016/j.expthermflusci.2019.109994\u000210.17863/cam.46212","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} -{"doi":"10.1109/tc.2018.2860012","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"computer hardware & architecture"} -{"doi":"10.1002/mma.6622","level1":"natural sciences","level2":"mathematics","level3":"numerical & computational mathematics"} -{"doi":"10.1051/radiopro/2020020","level1":"natural sciences","level2":"chemical sciences","level3":"NULL"} -{"doi":"10.1007/s12268-019-1003-4","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"10.3390/cancers12010236","level1":"medical and health sciences","level2":"health sciences","level3":"biochemistry & molecular biology"} -{"doi":"10.6084/m9.figshare.9912614\u000210.6084/m9.figshare.9912614.v1\u000210.1080/00268976.2019.1665199","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} -{"doi":"10.1175/jpo-d-17-0239.1","level1":"natural sciences","level2":"biological sciences","level3":"marine biology & hydrobiology"} -{"doi":"10.1007/s13218-020-00674-7","level1":"engineering and technology","level2":"industrial biotechnology","level3":"industrial engineering & automation"} -{"doi":"10.1016/j.psyneuen.2016.02.003\u000210.1016/j.psyneuen.2016.02.00310.7892/boris.78886\u000210.7892/boris.78886","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"10.1109/ted.2018.2813542","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"10.3989/scimar.04739.25a","level1":"natural sciences","level2":"biological sciences","level3":"NULL"} -{"doi":"10.3390/su12187503","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"NULL"} -{"doi":"10.1016/j.ccell.2018.08.017","level1":"medical and health sciences","level2":"basic medicine","level3":"biochemistry & molecular biology"} -{"doi":"10.1103/physrevresearch.2.023322","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1039/c8cp03234c","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.5281/zenodo.3696557\u000210.5281/zenodo.3696556\u000210.1109/jsac.2016.2545384","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} -{"doi":"10.1038/ng.3667\u000210.1038/ng.3667.\u000210.17615/tct6-4m26\u000210.17863/cam.15649","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} -{"doi":"10.1016/j.jclepro.2019.119065","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"10.1111/pce.13392","level1":"agricultural and veterinary sciences","level2":"agriculture, forestry, and fisheries","level3":"agronomy & agriculture"} \ No newline at end of file +{"doi":"10.1080/1536383x.2020.1868997","level1":"02 engineering and technology","level2":"0210 nano-technology","level3":"021001 nanoscience & nanotechnology"} +{"doi":"10.1080/1536383x.2020.1868997","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1186/s40425-019-0732-8","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1186/s40425-019-0732-8","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s10482-021-01529-3","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s10482-021-01529-3","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030306 microbiology"} +{"doi":"10.1155/2021/6643273","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010301 acoustics"} +{"doi":"10.1155/2021/6643273","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation"} +{"doi":"10.12737/article_5d6613dbf2ad51.82646096","level1":"02 engineering and technology","level2":"0210 nano-technology","level3":"021001 nanoscience & nanotechnology"} +{"doi":"10.12737/article_5d6613dbf2ad51.82646096","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010302 applied physics"} +{"doi":"10.1216/jie.2020.32.457","level1":"01 natural sciences","level2":"0101 mathematics","level3":"010101 applied mathematics"} +{"doi":"10.1216/jie.2020.32.457","level1":"01 natural sciences","level2":"0101 mathematics","level3":"010102 general mathematics"} +{"doi":"10.3934/naco.2021021","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021103 operations research"} +{"doi":"10.3934/naco.2021021","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation"} +{"doi":"10.1080/1034912x.2021.1910933","level1":"05 social sciences","level2":"050301 education","level3":"050301 education"} +{"doi":"10.1080/1034912x.2021.1910933","level1":"05 social sciences","level2":"0501 psychology and cognitive sciences","level3":"050104 developmental & child psychology"} +{"doi":"10.1016/j.rtbm.2020.100596","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing"} +{"doi":"10.1016/j.rtbm.2020.100596","level1":"05 social sciences","level2":"0502 economics and business","level3":"050212 sport, leisure & tourism"} +{"doi":"10.14807/ijmp.v11i8.1220","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing"} +{"doi":"10.14807/ijmp.v11i8.1220","level1":"05 social sciences","level2":"0502 economics and business","level3":"050203 business & management"} +{"doi":"10.1007/s13205-020-02415-x","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030304 developmental biology"} +{"doi":"10.1007/s13205-020-02415-x","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030302 biochemistry & molecular biology"} +{"doi":"10.3390/s18072310","level1":"04 agricultural and veterinary sciences","level2":"0404 agricultural biotechnology","level3":"040502 food science"} +{"doi":"10.3390/s18072310","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030309 nutrition & dietetics"} +{"doi":"10.1063/5.0032658","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010304 chemical physics"} +{"doi":"10.1063/5.0032658","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1145/3411174.3411195","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020201 artificial intelligence & image processing"} +{"doi":"10.1145/3411174.3411195","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020206 networking & telecommunications"} +{"doi":"10.1021/acs.joc.0c02755","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry"} +{"doi":"10.1021/acs.joc.0c02755","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1002/jcp.28608","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1002/jcp.28608","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1097/cmr.0000000000000579","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1097/cmr.0000000000000579","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s11164-020-04383-6","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry"} +{"doi":"10.1007/s11164-020-04383-6","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1016/j.actpsy.2020.103155","level1":"05 social sciences","level2":"0501 psychology and cognitive sciences","level3":"050105 experimental psychology"} +{"doi":"10.1016/j.actpsy.2020.103155","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030217 neurology & neurosurgery"} +{"doi":"10.1109/memea49120.2020.9137187","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020208 electrical & electronic engineering"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv new file mode 100644 index 000000000..c5a2a821a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv @@ -0,0 +1,39 @@ +10.1080/1536383x.2020.1868997,02 engineering and technology,0210 nano-technology,021001 nanoscience & nanotechnology +10.1080/1536383x.2020.1868997,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1186/s40425-019-0732-8,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1186/s40425-019-0732-8,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s10482-021-01529-3,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s10482-021-01529-3,03 medical and health sciences,0301 basic medicine,030306 microbiology +10.1155/2021/6643273,01 natural sciences,0103 physical sciences,010301 acoustics +10.1155/2021/6643273,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation +10.12737/article_5d6613dbf2ad51.82646096,02 engineering and technology,0210 nano-technology,021001 nanoscience & nanotechnology +10.12737/article_5d6613dbf2ad51.82646096,01 natural sciences,0103 physical sciences,010302 applied physics +10.1216/jie.2020.32.457,01 natural sciences,0101 mathematics,010101 applied mathematics +10.1216/jie.2020.32.457,01 natural sciences,0101 mathematics,010102 general mathematics +10.3934/naco.2021021,02 engineering and technology,0211 other engineering and technologies,021103 operations research +10.3934/naco.2021021,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation +10.1080/1034912x.2021.1910933,05 social sciences,050301 education,050301 education +10.1080/1034912x.2021.1910933,05 social sciences,0501 psychology and cognitive sciences,050104 developmental & child psychology +10.1016/j.rtbm.2020.100596,05 social sciences,0502 economics and business,050211 marketing +10.1016/j.rtbm.2020.100596,05 social sciences,0502 economics and business,"050212 sport, leisure & tourism" +10.14807/ijmp.v11i8.1220,05 social sciences,0502 economics and business,050211 marketing +10.14807/ijmp.v11i8.1220,05 social sciences,0502 economics and business,050203 business & management +10.1007/s13205-020-02415-x,03 medical and health sciences,0303 health sciences,030304 developmental biology +10.1007/s13205-020-02415-x,03 medical and health sciences,0303 health sciences,030302 biochemistry & molecular biology +10.3390/foods10040865,04 agricultural and veterinary sciences,0404 agricultural biotechnology,040502 food science +10.3390/foods10040865,03 medical and health sciences,0303 health sciences,030309 nutrition & dietetics +10.1063/5.0032658,01 natural sciences,0103 physical sciences,010304 chemical physics +10.1063/5.0032658,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1145/3411174.3411195,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020201 artificial intelligence & image processing +10.1145/3411174.3411195,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020206 networking & telecommunications +10.1021/acs.joc.0c02755,01 natural sciences,0104 chemical sciences,010405 organic chemistry +10.1021/acs.joc.0c02755,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1002/jcp.28608,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1002/jcp.28608,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1097/cmr.0000000000000579,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1097/cmr.0000000000000579,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s11164-020-04383-6,01 natural sciences,0104 chemical sciences,010405 organic chemistry +10.1007/s11164-020-04383-6,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1016/j.actpsy.2020.103155,05 social sciences,0501 psychology and cognitive sciences,050105 experimental psychology +10.1016/j.actpsy.2020.103155,03 medical and health sciences,0302 clinical medicine,030217 neurology & neurosurgery +10.1109/memea49120.2020.9137187,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020208 electrical & electronic engineering \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv deleted file mode 100644 index e874353e8..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv +++ /dev/null @@ -1,38 +0,0 @@ -dedup_wf_001::ddcc7a56fa13e49bcc59c6bdd19ad26c 10.3390/s18072310 engineering and technology nano-technology nanoscience & nanotechnology -dedup_wf_001::b76062d56e28224eac56111a4e1e5ecf 10.1111/1365-2656.1283110.17863/cam.24369 social sciences psychology and cognitive sciences NULL -dedup_wf_001::bb752acb8f403a25fa7851a302f7b7ac 10.3929/ethz-b-00018758410.1002/chem.201701644 natural sciences NULL NULL -dedup_wf_001::2f1435a9201ecf5cbbcb12c9b2d971cd 10.1080/01913123.2017.1367361 medical and health sciences clinical medicine oncology & carcinogenesis -dedup_wf_001::fc9e47ec16c67b101724320d4b030514 10.1051/e3sconf/20199207011 natural sciences earth and related environmental sciences environmental sciences -dedup_wf_001::caa1e5b4de387cb31751552f4f0f5d72 10.1038/onc.2015.333 medical and health sciences clinical medicine oncology & carcinogenesis -dedup_wf_001::c2a98df5637d69bf0524eaf40fe6bf11 10.1093/mnras/staa256 natural sciences physical sciences NULL -dedup_wf_001::c221262bdc77cbfd59859a402f0e3991 10.1016/j.jclepro.2018.07.166 engineering and technology other engineering and technologies building & construction -doiboost____::d56d9dc21f317b3e009d5b6c8ea87212 10.1103/physrevlett.125.037403 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::8a7269c8ee6470b2fb4fd384bc389e08 10.1080/03602532.2017.1316285 natural sciences NULL NULL -dedup_wf_001::28342ebbc19833e4e1f4a2b23cf5ee20 10.1001/jamanetworkopen.2019.1868 medical and health sciences other medical science health policy & services -dedup_wf_001::c1e1daf2b55dd9ec8e1c7c7458bbc7bc 10.1128/mra.00874-18 natural sciences biological sciences plant biology & botany -dedup_wf_001::a2ef4a2720c71907180750e5871298ef 10.1016/j.nancom.2018.03.001 engineering and technology NULL NULL -dedup_wf_001::676f46a31519e83a89efcb1c626286fb 10.1112/topo.12174 natural sciences NULL NULL -dedup_wf_001::6f2761642f1e39313388e2c4060657dd 10.12688/wellcomeopenres.15846.1 medical and health sciences health sciences NULL -dedup_wf_001::e414c1dec599521a9635a60de0f6755b 10.21468/scipostphys.3.1.001 natural sciences physical sciences NULL -dedup_wf_001::f3395fe0f330164ea424dc61c86c9a3d 10.1088/1741-4326/ab6c77 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::a4f32a97a783117012f1de11797e73f2 10.1109/tpwrs.2019.2944747 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering -dedup_wf_001::313ae1cd083ae1696d12dd1909f97df8 10.1016/j.expthermflusci.2019.10999410.17863/cam.46212 engineering and technology mechanical engineering mechanical engineering & transports -dedup_wf_001::2a300a7d3ca7347791ebcef986bc0682 10.1109/tc.2018.2860012 engineering and technology electrical engineering, electronic engineering, information engineering computer hardware & architecture -doiboost____::5b79bd7bd9f87361b4a4abc3cbb2df75 10.1002/mma.6622 natural sciences mathematics numerical & computational mathematics -dedup_wf_001::6a3f61f217a2519fbaddea1094e3bfc2 10.1051/radiopro/2020020 natural sciences chemical sciences NULL -dedup_wf_001::a3f0430309a639f4234a0e57b10f2dee 10.1007/s12268-019-1003-4 medical and health sciences basic medicine NULL -dedup_wf_001::b6b8a3a1cccbee459cf3343485efdb12 10.3390/cancers12010236 medical and health sciences health sciences biochemistry & molecular biology -dedup_wf_001::dd06ee7974730e7b09a4f03c83b3f9bd 10.6084/m9.figshare.991261410.6084/m9.figshare.9912614.v110.1080/00268976.2019.1665199 natural sciences chemical sciences physical chemistry -dedup_wf_001::027c78bef6f972b5e26dfea55d30fbe3 10.1175/jpo-d-17-0239.1 natural sciences biological sciences marine biology & hydrobiology -dedup_wf_001::43edc179aa9e1fbaf582c5203b18b519 10.1007/s13218-020-00674-7 engineering and technology industrial biotechnology industrial engineering & automation -dedup_wf_001::e7770e11cd6eb514bb52c07b5a8a80f0 10.1016/j.psyneuen.2016.02.00310.1016/j.psyneuen.2016.02.00310.7892/boris.7888610.7892/boris.78886 medical and health sciences basic medicine NULL -dedup_wf_001::80bc15d69bdc589149631f3439dde5aa 10.1109/ted.2018.2813542 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering -dedup_wf_001::42c1cfa33e7872944b920cff90f4d99e 10.3989/scimar.04739.25a natural sciences biological sciences NULL -dedup_wf_001::9bacdbbaa9da3658b7243d5de8e3ce14 10.3390/su12187503 natural sciences earth and related environmental sciences NULL -dedup_wf_001::59e43d3527dcfecb6097fbd5740c8950 10.1016/j.ccell.2018.08.017 medical and health sciences basic medicine biochemistry & molecular biology -doiboost____::e024d1b738df3b24bc58fa0228542571 10.1103/physrevresearch.2.023322 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::66e9a3237fa8178886d26d3c2d5b9e66 10.1039/c8cp03234c natural sciences NULL NULL -dedup_wf_001::83737ab4205bae751571bb3b166efa18 10.5281/zenodo.369655710.5281/zenodo.369655610.1109/jsac.2016.2545384 engineering and technology electrical engineering, electronic engineering, information engineering networking & telecommunications -dedup_wf_001::e3f892db413a689e572dd256acad55fe 10.1038/ng.366710.1038/ng.3667.10.17615/tct6-4m2610.17863/cam.15649 medical and health sciences health sciences genetics & heredity -dedup_wf_001::14ba594e8fd081847bc3f50f56335003 10.1016/j.jclepro.2019.119065 engineering and technology other engineering and technologies building & construction -dedup_wf_001::08ac7b33a41bcea2d055ecd8585d632e 10.1111/pce.13392 agricultural and veterinary sciences agriculture, forestry, and fisheries agronomy & agriculture \ No newline at end of file From 6116fc5d408b2381e0006060a2ba55b2ceb53114 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 23:04:22 +0100 Subject: [PATCH 112/176] [FOS]added logic to include only different subjects. Test refactoring and extention --- .../PrepareFOSSparkJob.java | 1 + .../createunresolvedentities/ProduceTest.java | 389 +++++++++++------- 2 files changed, 251 insertions(+), 139 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 8aadaf98e..d4a02c2ff 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -79,6 +79,7 @@ public class PrepareFOSSparkJob implements Serializable { level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + r.setSubject(sbjs); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 8635dcfb8..b1ffeee17 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -67,8 +67,216 @@ public class ProduceTest { } @Test - void produceTest() throws Exception { + void produceTestSubjects()throws Exception{ + JavaRDD tmp = getResultJavaRDD(); + + List sbjs = tmp + .filter(row -> row.getSubject()!= null && row.getSubject().size()>0) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals( + "Fields of Science and Technology classification", sbj.getQualifier().getClassname())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); + + sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); + sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); + sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible())); + sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); + sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); + sbjs + .forEach( + sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); + } + + @Test + void produceTestMeasuress()throws Exception{ + + JavaRDD tmp = getResultJavaRDD(); + + List mes = tmp + .filter(row -> row.getInstance()!= null && row.getInstance().size()>0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i->i.getMeasures().iterator()) + .flatMap(m ->m.getUnit().iterator()) + .collect(); + + + + mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); + mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); + mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible())); + mes.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); + mes.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); + mes + .forEach( + sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid())); + mes + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + mes + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + mes + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); + } + @Test + void produceTest6Subjects() throws Exception{ + final String doi = "unresolved::10.3390/s18072310::doi"; + + JavaRDD tmp = getResultJavaRDD(); + + Assertions + .assertEquals( + 6, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); + + List sbjs = tmp + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); + + + } + + @Test + void produceTest3Measures()throws Exception{ + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + Assertions + .assertEquals( + 3, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getMeasures() + .size()); + + + List measures = tmp + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(inst -> inst.getMeasures().iterator()) + .collect(); + Assertions + .assertEquals( + "7.5597134689e-09", measures + .stream() + .filter(mes -> mes.getId().equals("influence")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "4.903880192", measures + .stream() + .filter(mes -> mes.getId().equals("popularity_alt")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "1.17977512835e-08", measures + .stream() + .filter(mes -> mes.getId().equals("popularity")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + } + @Test + void produceTestSomeNumbers() throws Exception { + + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + Assertions.assertEquals(105, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); + + Assertions + .assertEquals( + 19, tmp + .filter(row -> !row.getId().equals(doi)) + .filter(row -> row.getSubject() != null) + .count()); + + Assertions + .assertEquals( + 85, + tmp + .filter(row -> !row.getId().equals(doi)) + .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) + .count()); + + } + + private JavaRDD getResultJavaRDD() throws Exception { final String bipPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") .getPath(); @@ -103,146 +311,49 @@ public class ProduceTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + return sc .textFile(workingDir.toString() + "/unresolved") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - - Assertions.assertEquals(105, tmp.count()); - - Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")).count()); - - Assertions - .assertEquals( - 6, tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .collect() - .get(0) - .getSubject() - .size()); - - Assertions - .assertEquals( - 3, tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .collect() - .get(0) - .getInstance() - .get(0) - .getMeasures() - .size()); - - List sbjs = tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .flatMap(row -> row.getSubject().iterator()) - .collect(); - - sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid())); - sbjs - .forEach( - sbj -> Assertions - .assertEquals( - "Fields of Science and Technology classification", sbj.getQualifier().getClassname())); - sbjs - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); - sbjs - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); - - sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); - sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); - sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible())); - sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); - sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); - sbjs - .forEach( - sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid())); - sbjs - .forEach( - sbj -> Assertions - .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); - sbjs - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); - sbjs - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, - sbj.getDataInfo().getProvenanceaction().getSchemename())); - - Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); - Assertions.assertEquals(false, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology"))); - Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); - Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); - - Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); - Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); - Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); - - List measures = tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .flatMap(row -> row.getInstance().iterator()) - .flatMap(inst -> inst.getMeasures().iterator()) - .collect(); - Assertions - .assertEquals( - "7.5597134689e-09", measures - .stream() - .filter(mes -> mes.getId().equals("influence")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - - Assertions - .assertEquals( - "4.903880192", measures - .stream() - .filter(mes -> mes.getId().equals("popularity_alt")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - - Assertions - .assertEquals( - "1.17977512835e-08", measures - .stream() - .filter(mes -> mes.getId().equals("popularity")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - - Assertions - .assertEquals( - 19, tmp - .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) - .filter(row -> row.getSubject() != null) - .count()); - - Assertions - .assertEquals( - 85, - tmp - .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) - .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) - .count()); - } + + @Test + void prepareTest5Subjects()throws Exception{ + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); + + Assertions + .assertEquals( + 5, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); + + + + + List sbjs = tmp + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0103 physical sciences"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0104 chemical sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry"))); + + } + } From 10579c0dd0c037476ae938b2bdec4984fc23617c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 23:10:16 +0100 Subject: [PATCH 113/176] [FOS]fixed doi value in test --- .../PrepareFOSSparkJob.java | 40 +-- .../createunresolvedentities/ProduceTest.java | 260 +++++++++--------- 2 files changed, 146 insertions(+), 154 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index d4a02c2ff..c8f472db7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -65,35 +65,35 @@ public class PrepareFOSSparkJob implements Serializable { private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); - fosDataset.groupByKey((MapFunction)v->v.getDoi(), Encoders.STRING()) - .mapGroups((MapGroupsFunction)(k,it)->{ - Result r = new Result(); - FOSDataModel first = it.next(); - r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI)); - HashSet level1 = new HashSet<>(); - HashSet level2 = new HashSet<>(); - HashSet level3 = new HashSet<>(); - addLevels(level1, level2, level3, first); - it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); - Listsbjs = new ArrayList<>(); - level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - r.setSubject(sbjs); - return r; - }, Encoders.bean(Result.class)) + fosDataset + .groupByKey((MapFunction) v -> v.getDoi(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + Result r = new Result(); + FOSDataModel first = it.next(); + r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI)); + HashSet level1 = new HashSet<>(); + HashSet level2 = new HashSet<>(); + HashSet level3 = new HashSet<>(); + addLevels(level1, level2, level3, first); + it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); + List sbjs = new ArrayList<>(); + level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + r.setSubject(sbjs); + return r; + }, Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + "/fos"); } - private static void addLevels(HashSet level1, HashSet level2, HashSet level3, FOSDataModel first) { + private static void addLevels(HashSet level1, HashSet level2, HashSet level3, + FOSDataModel first) { level1.add(first.getLevel1()); level2.add(first.getLevel2()); level3.add(first.getLevel3()); } - - } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index b1ffeee17..32fb25640 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -67,29 +67,29 @@ public class ProduceTest { } @Test - void produceTestSubjects()throws Exception{ + void produceTestSubjects() throws Exception { JavaRDD tmp = getResultJavaRDD(); List sbjs = tmp - .filter(row -> row.getSubject()!= null && row.getSubject().size()>0) - .flatMap(row -> row.getSubject().iterator()) - .collect(); + .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) + .flatMap(row -> row.getSubject().iterator()) + .collect(); sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals( - "Fields of Science and Technology classification", sbj.getQualifier().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals( + "Fields of Science and Technology classification", sbj.getQualifier().getClassname())); sbjs - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); @@ -97,38 +97,36 @@ public class ProduceTest { sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); sbjs - .forEach( - sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid())); + .forEach( + sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); sbjs - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); sbjs - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, - sbj.getDataInfo().getProvenanceaction().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); } @Test - void produceTestMeasuress()throws Exception{ + void produceTestMeasuress() throws Exception { JavaRDD tmp = getResultJavaRDD(); List mes = tmp - .filter(row -> row.getInstance()!= null && row.getInstance().size()>0) - .flatMap(row -> row.getInstance().iterator()) - .flatMap(i->i.getMeasures().iterator()) - .flatMap(m ->m.getUnit().iterator()) - .collect(); - - + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getMeasures().iterator()) + .flatMap(m -> m.getUnit().iterator()) + .collect(); mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); @@ -136,119 +134,118 @@ public class ProduceTest { mes.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); mes.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); mes - .forEach( - sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid())); + .forEach( + sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid())); mes - .forEach( - sbj -> Assertions - .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); mes - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); mes - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, - sbj.getDataInfo().getProvenanceaction().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); } + @Test - void produceTest6Subjects() throws Exception{ + void produceTest6Subjects() throws Exception { final String doi = "unresolved::10.3390/s18072310::doi"; JavaRDD tmp = getResultJavaRDD(); Assertions - .assertEquals( - 6, tmp - .filter(row -> row.getId().equals(doi)) - .collect() - .get(0) - .getSubject() - .size()); + .assertEquals( + 6, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); List sbjs = tmp - .filter(row -> row.getId().equals(doi)) - .flatMap(row -> row.getSubject().iterator()) - .collect(); - + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); - + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); } @Test - void produceTest3Measures()throws Exception{ + void produceTest3Measures() throws Exception { final String doi = "unresolved::10.3390/s18072310::doi"; JavaRDD tmp = getResultJavaRDD(); Assertions - .assertEquals( - 3, tmp - .filter(row -> row.getId().equals(doi)) - .collect() - .get(0) - .getInstance() - .get(0) - .getMeasures() - .size()); - + .assertEquals( + 3, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getMeasures() + .size()); List measures = tmp - .filter(row -> row.getId().equals(doi)) - .flatMap(row -> row.getInstance().iterator()) - .flatMap(inst -> inst.getMeasures().iterator()) - .collect(); + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(inst -> inst.getMeasures().iterator()) + .collect(); Assertions - .assertEquals( - "7.5597134689e-09", measures - .stream() - .filter(mes -> mes.getId().equals("influence")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); + .assertEquals( + "7.5597134689e-09", measures + .stream() + .filter(mes -> mes.getId().equals("influence")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); Assertions - .assertEquals( - "4.903880192", measures - .stream() - .filter(mes -> mes.getId().equals("popularity_alt")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); + .assertEquals( + "4.903880192", measures + .stream() + .filter(mes -> mes.getId().equals("popularity_alt")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); Assertions - .assertEquals( - "1.17977512835e-08", measures - .stream() - .filter(mes -> mes.getId().equals("popularity")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); + .assertEquals( + "1.17977512835e-08", measures + .stream() + .filter(mes -> mes.getId().equals("popularity")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); } + @Test void produceTestSomeNumbers() throws Exception { @@ -316,44 +313,39 @@ public class ProduceTest { .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); } - @Test - void prepareTest5Subjects()throws Exception{ - final String doi = "unresolved::10.3390/s18072310::doi"; + void prepareTest5Subjects() throws Exception { + final String doi = "unresolved::10.1063/5.0032658::doi"; + JavaRDD tmp = getResultJavaRDD(); Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); Assertions - .assertEquals( - 5, tmp - .filter(row -> row.getId().equals(doi)) - .collect() - .get(0) - .getSubject() - .size()); - - - + .assertEquals( + 5, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); List sbjs = tmp - .filter(row -> row.getId().equals(doi)) - .flatMap(row -> row.getSubject().iterator()) - .collect(); - - + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0103 physical sciences"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0104 chemical sciences"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry"))); } - + } From 31b26d48acbe3e1d60776ca9dff3caa530c15198 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 23 Dec 2021 10:09:47 +0100 Subject: [PATCH 114/176] [Graph Dump] fixed issue on extraction of relation between entities and contexts: the relationship name and type were swapped --- .../dhp/oa/graph/dump/complete/Extractor.java | 6 +-- .../ExtractRelationFromEntityTest.java | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java index 3e47ee664..cb4e42ac7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java @@ -89,7 +89,7 @@ public class Extractor implements Serializable { value.getId(), contextId, Constants.RESULT_ENTITY, Constants.CONTEXT_ENTITY, - ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, provenance); + ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance); if (!hashCodes.contains(r.hashCode())) { relationList .add(r); @@ -99,8 +99,8 @@ public class Extractor implements Serializable { contextId, value.getId(), Constants.CONTEXT_ENTITY, Constants.RESULT_ENTITY, - ModelConstants.RELATIONSHIP, - ModelConstants.IS_RELATED_TO, provenance); + ModelConstants.IS_RELATED_TO, + ModelConstants.RELATIONSHIP, provenance); if (!hashCodes.contains(r.hashCode())) { relationList .add( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index e43383ef4..511236d82 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; @@ -96,6 +97,42 @@ public class ExtractRelationFromEntityTest { 9, verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daxab3685c3'").count()); + Assertions + .assertEquals( + "IsRelatedTo", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getName()); + + Assertions + .assertEquals( + "relationship", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getType()); + + Assertions + .assertEquals( + "IsRelatedTo", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getName()); + + Assertions + .assertEquals( + "relationship", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getType()); + } } From 69e9ea9eeb458d3d92b52b39e74e00698f5d5f9e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 23 Dec 2021 10:15:30 +0100 Subject: [PATCH 115/176] [Graph Dump] Test for extraction of rels from entities extended --- .../ExtractRelationFromEntityTest.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index 511236d82..93d520eb4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -115,6 +115,23 @@ public class ExtractRelationFromEntityTest { .getReltype() .getType()); + Assertions + .assertEquals( + "context", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getSource() + .getType()); + + Assertions + .assertEquals( + "result", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getTarget() + .getType()); Assertions .assertEquals( "IsRelatedTo", verificationDataset @@ -133,6 +150,23 @@ public class ExtractRelationFromEntityTest { .getReltype() .getType()); + Assertions + .assertEquals( + "context", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getTarget() + .getType()); + + Assertions + .assertEquals( + "result", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getSource() + .getType()); } } From 2a67ee13ecb1f62ca47ccbbc71f4d12651669b22 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 23 Dec 2021 10:37:52 +0100 Subject: [PATCH 116/176] [SDG] added model class --- .../createunresolvedentities/model/SDGDataModel.java | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java new file mode 100644 index 000000000..9b0300bf2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java @@ -0,0 +1,6 @@ +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; + +import java.io.Serializable; + +public class SDGDataModel implements Serializable { +} From cccb16900c15690cb1e22995674d7b8abbbc3010 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 23 Dec 2021 12:33:53 +0100 Subject: [PATCH 117/176] https://support.openaire.eu/issues/7330 normalising DOI urls --- .../oa/provision/utils/XmlRecordFactory.java | 17 +++++++++++ .../provision/IndexRecordTransformerTest.java | 29 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index af7c7b7c3..9a3f2188b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; +import java.net.MalformedURLException; import java.net.URL; import java.util.*; import java.util.stream.Collectors; @@ -22,6 +23,7 @@ import javax.xml.transform.stream.StreamResult; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.http.protocol.HTTP; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -55,6 +57,8 @@ public class XmlRecordFactory implements Serializable { * */ private static final long serialVersionUID = 2912912999272373172L; + public static final String DOI_ORG_AUTHORITY = "doi.org"; + public static final String HTTPS = "https"; private final Map accumulators; @@ -1269,6 +1273,7 @@ public class XmlRecordFactory implements Serializable { .getUrl() .stream() .filter(this::isValidUrl) + .map(XmlRecordFactory::normalizeDoiUrl) .collect(Collectors.toList())); return i; }) @@ -1285,6 +1290,18 @@ public class XmlRecordFactory implements Serializable { .map(this::mergeInstances); } + public static String normalizeDoiUrl(String url) { + if (url.contains(DOI_ORG_AUTHORITY)) { + try { + URL u = new URL(url); + return new URL(HTTPS, DOI_ORG_AUTHORITY, u.getFile()).toString(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + } + return url; + } + private boolean isValidUrl(String url) { try { new URL(url).toURI(); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 64935e79d..86a155292 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -1,9 +1,14 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Arrays; +import java.util.List; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -109,6 +114,30 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + void testDoiUrlNormalization() throws MalformedURLException { + + // TODO add more test examples when needed + List urls = Arrays + .asList( + "https://dx.doi.org/10.1016/j.jas.2019.105013", + "http://dx.doi.org/10.13140/rg.2.2.26964.65927", + "https://dx.doi.org/10.13140/rg.2.2.26964.65927", + "http://dx.doi.org/10.1016/j.jas.2019.105013", + "http://hdl.handle.net/2072/369223", + "https://doi.org/10.1016/j.jas.2019.105013"); + + for (String url : urls) { + URL u = new URL(XmlRecordFactory.normalizeDoiUrl(url)); + if (url.contains(XmlRecordFactory.DOI_ORG_AUTHORITY)) { + assertEquals(XmlRecordFactory.HTTPS, u.getProtocol()); + assertEquals(XmlRecordFactory.DOI_ORG_AUTHORITY, u.getAuthority()); + } else { + assertEquals(url, u.toString()); + } + } + } + private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); From 7a1b4404132b2b2c8aedd2968a8c9eaa908ccedf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 23 Dec 2021 13:24:28 +0100 Subject: [PATCH 118/176] [SDG] logic to create unresolved entities out of SDG input. This changes also some classes related to FOS to reuse the same code. The code under createunresolvedentities create results with the merged update of the the inputs provided (bip at the level of the isntance, fos and sdg for subjects) --- .../dnetlib/dhp/actionmanager/Constants.java | 10 +- .../{GetFOSData.java => GetInputData.java} | 21 ++- .../PrepareFOSSparkJob.java | 10 +- .../PrepareSDGSparkJob.java | 86 +++++++++ .../model/SDGDataModel.java | 44 ++++- ...ameters.json => get_input_parameters.json} | 0 .../oozie_app/workflow.xml | 39 +++- .../createunresolvedentities/PrepareTest.java | 136 +++++++++++--- .../createunresolvedentities/ProduceTest.java | 172 ++++++++++++++++++ .../createunresolvedentities/sdg/sdg.json | 37 ++++ .../createunresolvedentities/sdg/sdg_sbs.csv | 37 ++++ .../ExtractRelationFromEntityTest.java | 56 +++--- 12 files changed, 581 insertions(+), 67 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/{GetFOSData.java => GetInputData.java} (75%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/{get_fos_parameters.json => get_input_parameters.json} (100%) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 153a98d3f..ef4ef6756 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -19,14 +19,20 @@ public class Constants { public static final String DOI = "doi"; + public static final char DEFAULT_DELIMITER = ','; + public static final String UPDATE_DATA_INFO_TYPE = "update"; public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; + public static final String UPDATE_SUBJECT_SDG_CLASS_ID = "subject:sdg"; public static final String FOS_CLASS_ID = "FOS"; public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; + public static final String SDG_CLASS_ID = "SDG"; + public static final String SDG_CLASS_NAME = "Sustainable Development Goals"; + public static final String NULL = "NULL"; public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -49,7 +55,7 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - public static StructuredProperty getSubject(String sbj, String classid, String classname) { + public static StructuredProperty getSubject(String sbj, String classid, String classname, String diqualifierclassid) { if (sbj.equals(NULL)) return null; StructuredProperty sp = new StructuredProperty(); @@ -72,7 +78,7 @@ public class Constants { false, OafMapperUtils .qualifier( - UPDATE_SUBJECT_FOS_CLASS_ID, + diqualifierclassid, UPDATE_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetInputData.java similarity index 75% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetInputData.java index bd430d940..499e42a8e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetInputData.java @@ -6,6 +6,7 @@ import java.io.InputStreamReader; import java.io.Serializable; import java.util.Objects; import java.util.Optional; +import java.util.zip.GZIPInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -15,13 +16,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.GetCSV; -public class GetFOSData implements Serializable { +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; + +public class GetInputData implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(GetInputData.class); - private static final Logger log = LoggerFactory.getLogger(GetFOSData.class); - public static final char DEFAULT_DELIMITER = ','; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -29,9 +31,9 @@ public class GetFOSData implements Serializable { .toString( Objects .requireNonNull( - GetFOSData.class + GetInputData.class .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json")))); + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json")))); parser.parseArgument(args); @@ -60,16 +62,17 @@ public class GetFOSData implements Serializable { FileSystem fileSystem = FileSystem.get(conf); - new GetFOSData().doRewrite(sourcePath, outputPath, classForName, delimiter, fileSystem); + new GetInputData().doRewrite(sourcePath, outputPath, classForName, delimiter, fileSystem); } public void doRewrite(String inputPath, String outputFile, String classForName, char delimiter, FileSystem fs) throws IOException, ClassNotFoundException { + // reads the csv and writes it as its json equivalent - try (InputStreamReader reader = new InputStreamReader(fs.open(new Path(inputPath)))) { - GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); + try (InputStreamReader reader = new InputStreamReader(new GZIPInputStream(fs.open(new Path(inputPath))))) { + eu.dnetlib.dhp.common.collection.GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index c8f472db7..fef796515 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -66,20 +66,20 @@ public class PrepareFOSSparkJob implements Serializable { Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); fosDataset - .groupByKey((MapFunction) v -> v.getDoi(), Encoders.STRING()) + .groupByKey((MapFunction) v -> v.getDoi().toLowerCase(), Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result r = new Result(); FOSDataModel first = it.next(); - r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI)); + r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); HashSet level1 = new HashSet<>(); HashSet level2 = new HashSet<>(); HashSet level3 = new HashSet<>(); addLevels(level1, level2, level3, first); it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); List sbjs = new ArrayList<>(); - level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); - level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME))); + level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); r.setSubject(sbjs); return r; }, Encoders.bean(Result.class)) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java new file mode 100644 index 000000000..b0607ead5 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -0,0 +1,86 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.utils.DHPUtils; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import static eu.dnetlib.dhp.actionmanager.Constants.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class PrepareSDGSparkJob implements Serializable { + private static final Logger log = LoggerFactory.getLogger(PrepareSDGSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareSDGSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String sourcePath = parser.get("sourcePath"); + log.info("sourcePath: {}", sourcePath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + doPrepare( + spark, + sourcePath, + + outputPath); + }); + } + + private static void doPrepare(SparkSession spark, String sourcePath, String outputPath) { + Dataset sdgDataset = readPath(spark, sourcePath, SDGDataModel.class); + + + sdgDataset.groupByKey((MapFunction)r -> r.getDoi().toLowerCase(),Encoders.STRING()) + .mapGroups((MapGroupsFunction)(k,it) -> { + Result r = new Result(); + r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); + SDGDataModel first = it.next(); + Listsbjs = new ArrayList<>(); + sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); + it.forEachRemaining(s -> sbjs.add(getSubject(s.getSbj(),SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); + r.setSubject(sbjs); + return r; + },Encoders.bean(Result.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/sdg"); + } + + + + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java index 9b0300bf2..7b1584e3f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java @@ -1,6 +1,48 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; +import com.opencsv.bean.CsvBindByPosition; + import java.io.Serializable; -public class SDGDataModel implements Serializable { +public class SDGDataModel implements Serializable{ + + @CsvBindByPosition(position = 0) +// @CsvBindByName(column = "doi") + private String doi; + + @CsvBindByPosition(position = 1) +// @CsvBindByName(column = "sdg") + private String sbj; + + + public SDGDataModel() { + + } + + public SDGDataModel(String doi, String sbj) { + this.doi = doi; + this.sbj = sbj; + + } + + public static SDGDataModel newInstance(String d, String sbj) { + return new SDGDataModel(d, sbj); + } + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + + public String getSbj() { + return sbj; + } + + public void setSbj(String sbj) { + this.sbj = sbj; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml index d53504fe6..9451af572 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -79,6 +79,7 @@ + @@ -107,7 +108,7 @@ - eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSData + eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetInputData --hdfsNameNode${nameNode} --sourcePath${fosPath} --outputPath${workingDir}/input/fos @@ -142,6 +143,42 @@ + + + eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetInputData + --hdfsNameNode${nameNode} + --sourcePath${sdgPath} + --outputPath${workingDir}/input/sdg + --classForNameeu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel + + + + + + + + yarn + cluster + Produces the unresolved from FOS! + eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/input/sdg + --outputPath${workingDir}/prepared + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 67a0f3465..17137c39a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -10,6 +10,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.stream.Collectors; +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -18,17 +19,13 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; -import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.schema.oaf.Result; public class PrepareTest { @@ -159,7 +156,7 @@ public class PrepareTest { .getPath(); final String outputPath = workingDir.toString() + "/fos.json"; - new GetFOSData() + new GetInputData() .doRewrite( sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel", ',', fs); @@ -180,6 +177,8 @@ public class PrepareTest { } + + @Test void fosPrepareTest() throws Exception { final String sourcePath = getClass() @@ -204,15 +203,9 @@ public class PrepareTest { String doi1 = "unresolved::10.3390/s18072310::doi"; - assertEquals(50, tmp.count()); + assertEquals(20, tmp.count()); assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); - assertTrue( - tmp - .filter(r -> r.getId().equals(doi1)) - .flatMap(r -> r.getSubject().iterator()) - .map(sbj -> sbj.getValue()) - .collect() - .contains("engineering and technology")); + assertTrue( tmp @@ -220,16 +213,16 @@ public class PrepareTest { .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("nano-technology")); + .contains("04 agricultural and veterinary sciences")); assertTrue( tmp .filter(r -> r.getId().equals(doi1)) .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("nanoscience & nanotechnology")); + .contains("0404 agricultural biotechnology")); - String doi = "unresolved::10.1111/1365-2656.12831::doi"; + String doi = "unresolved::10.1007/s11164-020-04383-6::doi"; assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); assertTrue( tmp @@ -237,7 +230,7 @@ public class PrepareTest { .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("psychology and cognitive sciences")); + .contains("01 natural sciences")); assertTrue( tmp @@ -245,15 +238,116 @@ public class PrepareTest { .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("social sciences")); - assertFalse( + .contains("0104 chemical sciences")); + assertTrue( tmp .filter(r -> r.getId().equals(doi)) .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("NULL")); + .contains("010402 general chemistry")); } + @Test + void getSDGFileTest() throws IOException, ClassNotFoundException { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv") + .getPath(); + final String outputPath = workingDir.toString() + "/sdg.json"; + + new GetInputData() + .doRewrite( + sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel", + ',', fs); + + BufferedReader in = new BufferedReader( + new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + SDGDataModel sdg = new ObjectMapper().readValue(line, SDGDataModel.class); + + System.out.println(new ObjectMapper().writeValueAsString(sdg)); + count += 1; + } + + assertEquals(37, count); + + } + + @Test + void sdgPrepareTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") + .getPath(); + + PrepareSDGSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + + "-outputPath", workingDir.toString() + "/work" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/work/sdg") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + String doi1 = "unresolved::10.1001/amaguidesnewsletters.2019.sepoct02::doi"; + + assertEquals(32, tmp.count()); + assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); + + + assertTrue( + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("3. Good health")); + assertTrue( + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("8. Economic growth")); + + + } + @Disabled + @Test + void test2() throws Exception { + final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; + + + final String outputPath = workingDir.toString() + "/sdg.json"; + + new GetInputData() + .doRewrite( + sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel", + ',', fs); + + BufferedReader in = new BufferedReader( + new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + SDGDataModel sdg = new ObjectMapper().readValue(line, SDGDataModel.class); + + System.out.println(new ObjectMapper().writeValueAsString(sdg)); + count += 1; + } + + + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 32fb25640..28fce4adb 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -7,6 +7,7 @@ import java.nio.file.Path; import java.util.List; import java.util.stream.Collectors; +import eu.dnetlib.dhp.actionmanager.Constants; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -348,4 +349,175 @@ public class ProduceTest { } + + private JavaRDD getResultJavaRDDPlusSDG() throws Exception { + final String bipPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") + .getPath(); + + PrepareBipFinder + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", bipPath, + "--outputPath", workingDir.toString() + "/work" + + }); + final String fosPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", fosPath, + "-outputPath", workingDir.toString() + "/work" + }); + + final String sdgPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") + .getPath(); + + PrepareSDGSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sdgPath, + "-outputPath", workingDir.toString() + "/work" + }); + + SparkSaveUnresolved.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", workingDir.toString() + "/work", + + "-outputPath", workingDir.toString() + "/unresolved" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + return sc + .textFile(workingDir.toString() + "/unresolved") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + } + + + @Test + void produceTestSomeNumbersWithSDG() throws Exception { + + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDDPlusSDG(); + + Assertions.assertEquals(136, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); + + Assertions + .assertEquals( + 50, tmp + .filter(row -> !row.getId().equals(doi)) + .filter(row -> row.getSubject() != null) + .count()); + + Assertions + .assertEquals( + 85, + tmp + .filter(row -> !row.getId().equals(doi)) + .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) + .count()); + + } + + @Test + void produceTest7Subjects() throws Exception { + final String doi = "unresolved::10.3390/s18072310::doi"; + + JavaRDD tmp = getResultJavaRDDPlusSDG(); + + Assertions + .assertEquals( + 7, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); + + List sbjs = tmp + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("1. No poverty"))); + + } + + @Test + void produceTestSubjectsWithSDG() throws Exception { + + JavaRDD tmp = getResultJavaRDDPlusSDG(); + + List sbjs_sdg = tmp + .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) + .flatMap(row -> row.getSubject().iterator()) + .filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID)) + .collect(); + + sbjs_sdg.forEach(sbj -> Assertions.assertEquals("SDG", sbj.getQualifier().getClassid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals( + "Sustainable Development Goals", sbj.getQualifier().getClassname())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); + + sbjs_sdg.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); + sbjs_sdg + .forEach( + sbj -> Assertions.assertEquals("subject:sdg", sbj.getDataInfo().getProvenanceaction().getClassid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json new file mode 100644 index 000000000..59d707177 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json @@ -0,0 +1,37 @@ +{"doi":"10.1001/amaguidesnewsletters.2019.mayjun02","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2019.novdec01","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2019.sepoct02","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2019.sepoct02","sbj":"8. Economic growth"} +{"doi":"10.1001/amaguidesnewsletters.2020.janfeb01","sbj":"8. Economic growth"} +{"doi":"10.1001/amaguidesnewsletters.2020.janfeb02","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.janfeb02","sbj":"8. Economic growth"} +{"doi":"10.1001/amaguidesnewsletters.2020.julaug01","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.marapr01","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.mayjun01","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.mayjun02","sbj":"16. Peace & justice"} +{"doi":"10.1001/amaguidesnewsletters.2020.mayjun02","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2021.julaug01","sbj":"1. No poverty"} +{"doi":"10.1001/amaguidesnewsletters.2021.mayjune01","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2021.mayjune02","sbj":"10. No inequality"} +{"doi":"10.4336/2021.pfb.41e201902078","sbj":"15. Life on land"} +{"doi":"10.4337/ejeep.2019.00045","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2019.00050","sbj":"1. No poverty"} +{"doi":"10.4337/ejeep.2019.0045","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2019.0050","sbj":"1. No poverty"} +{"doi":"10.4337/ejeep.2019.0051","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2019.0052","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.0058","sbj":"1. No poverty"} +{"doi":"10.4337/ejeep.2020.0058","sbj":"10. No inequality"} +{"doi":"10.4337/ejeep.2020.0060","sbj":"10. No inequality"} +{"doi":"10.4337/ejeep.2020.0065","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.02.03","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.02.05","sbj":"8. Economic growth"} +{"doi":"10.4337/ejeep.2020.02.06","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.02.09","sbj":"16. Peace & justice"} +{"doi":"10.4337/roke.2020.01.01","sbj":"16. Peace & justice"} +{"doi":"10.4337/roke.2020.01.03","sbj":"16. Peace & justice"} +{"doi":"10.4337/roke.2020.01.05","sbj":"1. No poverty"} +{"doi":"10.4337/roke.2020.01.05","sbj":"8. Economic growth"} +{"doi":"10.4337/roke.2020.01.07","sbj":"8. Economic growth"} +{"doi":"10.4337/roke.2020.02.03","sbj":"8. Economic growth"} +{"doi":"10.3390/s18072310","sbj":"1. No poverty"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv new file mode 100644 index 000000000..30524467e --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv @@ -0,0 +1,37 @@ +10.1001/amaguidesnewsletters.2019.mayjun02,10. No inequality +10.1001/amaguidesnewsletters.2019.novdec01,10. No inequality +10.1001/amaguidesnewsletters.2019.sepoct02,3. Good health +10.1001/amaguidesnewsletters.2019.sepoct02,8. Economic growth +10.1001/amaguidesnewsletters.2020.janfeb01,8. Economic growth +10.1001/amaguidesnewsletters.2020.janfeb02,3. Good health +10.1001/amaguidesnewsletters.2020.janfeb02,8. Economic growth +10.1001/amaguidesnewsletters.2020.julaug01,3. Good health +10.1001/amaguidesnewsletters.2020.marapr01,3. Good health +10.1001/amaguidesnewsletters.2020.mayjun01,3. Good health +10.1001/amaguidesnewsletters.2020.mayjun02,16. Peace & justice +10.1001/amaguidesnewsletters.2020.mayjun02,10. No inequality +10.1001/amaguidesnewsletters.2021.julaug01,1. No poverty +10.1001/amaguidesnewsletters.2021.mayjune01,10. No inequality +10.1001/amaguidesnewsletters.2021.mayjune02,10. No inequality +10.4336/2021.pfb.41e201902078,15. Life on land +10.4337/ejeep.2019.00045,16. Peace & justice +10.4337/ejeep.2019.00050,1. No poverty +10.4337/ejeep.2019.0045,16. Peace & justice +10.4337/ejeep.2019.0050,1. No poverty +10.4337/ejeep.2019.0051,16. Peace & justice +10.4337/ejeep.2019.0052,16. Peace & justice +10.4337/ejeep.2020.0058,1. No poverty +10.4337/ejeep.2020.0058,10. No inequality +10.4337/ejeep.2020.0060,10. No inequality +10.4337/ejeep.2020.0065,16. Peace & justice +10.4337/ejeep.2020.02.03,16. Peace & justice +10.4337/ejeep.2020.02.05,8. Economic growth +10.4337/ejeep.2020.02.06,16. Peace & justice +10.4337/ejeep.2020.02.09,16. Peace & justice +10.4337/roke.2020.01.01,16. Peace & justice +10.4337/roke.2020.01.03,16. Peace & justice +10.4337/roke.2020.01.05,1. No poverty +10.4337/roke.2020.01.05,8. Economic growth +10.4337/roke.2020.01.07,8. Economic growth +10.4337/roke.2020.02.03,8. Economic growth +10.4337/roke.2020.02.04,1. No poverty \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index 93d520eb4..27573bb32 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -116,22 +116,22 @@ public class ExtractRelationFromEntityTest { .getType()); Assertions - .assertEquals( - "context", verificationDataset - .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) - .collectAsList() - .get(0) - .getSource() - .getType()); + .assertEquals( + "context", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getSource() + .getType()); Assertions - .assertEquals( - "result", verificationDataset - .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) - .collectAsList() - .get(0) - .getTarget() - .getType()); + .assertEquals( + "result", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getTarget() + .getType()); Assertions .assertEquals( "IsRelatedTo", verificationDataset @@ -151,22 +151,22 @@ public class ExtractRelationFromEntityTest { .getType()); Assertions - .assertEquals( - "context", verificationDataset - .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) - .collectAsList() - .get(0) - .getTarget() - .getType()); + .assertEquals( + "context", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getTarget() + .getType()); Assertions - .assertEquals( - "result", verificationDataset - .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) - .collectAsList() - .get(0) - .getSource() - .getType()); + .assertEquals( + "result", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getSource() + .getType()); } } From 7551e52e950085d8b67f4c789916e452a8c2848c Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 23 Dec 2021 15:33:53 +0200 Subject: [PATCH 119/176] fixed a typo --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index d142f5d68..2d6ff41d4 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -52,8 +52,7 @@ compute stats TARGET.result_languages; create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_licenses; -create table TARGET.licenses_normalized stored as parquet as select * from SOURCE.licenses_normalized orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.licenses_normalized; +create table TARGET.licenses_normalized as select * from SOURCE.licenses_normalized; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; From 81ee654271901ab9ba7a3b4241e5535adf9c6b19 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 23 Dec 2021 15:46:17 +0200 Subject: [PATCH 120/176] added result_result relations --- .../oa/graph/stats/oozie_app/scripts/step13.sql | 15 ++++++++++++++- .../oozie_app/scripts/step20-createMonitorDB.sql | 3 +++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index e4e81175c..315d296fc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -67,4 +67,17 @@ from ( LATERAL VIEW explode(author) a as auth LATERAL VIEW explode(auth.pid) ap as auth_pid LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type - WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res \ No newline at end of file + WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; + +create table ${stats_db_name}.result_result stored as parquet as +select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype +from ${openaire_db_name}.relation rel +join ${openaire_db_name}.result r1 on rel.source=r1.id +join ${openaire_db_name}.result r2 on r2.id=rel.target +where reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false + and r2.datainfo.deletedbyinference=false + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 2d6ff41d4..e72378f56 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -81,6 +81,9 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; +create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); +compute stats TARGET.result_result; + -- datasources create view if not exists TARGET.datasource as select * from SOURCE.datasource; create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; From bcea4e3a9bbeff8ceec8bfab2fee90011c5dc84c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 3 Jan 2022 14:33:00 +0100 Subject: [PATCH 121/176] added dnet workflow profile for the orchestration of the simplified and complete graph construction and processing pipeline, where the IIS works on the non-deduplicated graph --- .../00_beta_graph_complete_experiment.xml | 1305 +++++++++++++++++ 1 file changed, 1305 insertions(+) create mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml new file mode 100644 index 000000000..c10dd4e99 --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml @@ -0,0 +1,1305 @@ + +
+ + + + + +
+ + Graph processing [EXPERIMENT] + Data Provision + 30 + + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA + + + + + + + set blacklist of funder nsPrefixes from the beta aggregator + + nsPrefixBlacklist_BETA + gsrt________,rcuk________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD + gsrt________,rcuk________,fct_________,nwo_________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_experiment/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_experiment/graph/02_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/beta_experiment/graph/03_graph_cleaned + + + + + + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_experiment/graph/04_graph_resolved + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/beta_experiment/graph/05_graph_inferred + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_experiment/graph/06_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_experiment/graph/07_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/beta_experiment/graph/08_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/beta_experiment/graph/09_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/beta_experiment/graph/10_graph_affiliation + + + + + + + Set the target path to store the AFFILIATION from SEMATIC RELATION graph + + affiliationSemRelGraphPath + /tmp/beta_experiment/graph/11_graph_affiliationsr + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/beta_experiment/graph/12_graph_community_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/beta_experiment/graph/13_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/beta_experiment/graph/14_graph_community_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/beta_experiment/graph/15_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_experiment/graph/16_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_experiment/graph/17_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + doiboost,orcidworks-no-doi,opencitations,h2020classification + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,iis-entities-software,iis-entities-patent + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + false + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_experiment/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + true + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_experiment/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : 'mongodb://beta.services.openaire.eu', + 'mongoDb' : 'mdstore', + 'mdstoreManagerUrl' : 'https://beta.services.openaire.eu/mdstoremanager', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_experiment/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : 'mongodb://services.openaire.eu', + 'mongoDb' : 'mdstore', + 'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_experiment/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_experiment/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/clean' + } + + build-report + + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'produceHBM', + 'hostedByMapPath' : '/user/dnet.beta/data/hostedByMap', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/hostedbymap/oozie_app', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G', + 'workingDir' : '/tmp/beta_experiment/working_dir/hostedbymap', + 'outputPath' : '/tmp/beta_experiment/working_dir/hostedbymap' + } + + build-report + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + prepare IIS action sets + + + [ + { + 'set' : 'iis-document-affiliation', + 'jobProperty' : 'export_action_set_id_matched_doc_organizations', + 'enablingProperty' : 'active_document_affiliation', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-projects-main', + 'jobProperty' : 'export_action_set_id_document_referencedProjects', + 'enablingProperty' : 'active_referenceextraction_project', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-datasets-main', + 'jobProperty' : 'export_action_set_id_document_referencedDatasets', + 'enablingProperty' : 'active_referenceextraction_dataset', + 'enabled' : 'true' + }, + { + 'set' : 'iis-researchinitiative', + 'jobProperty' : 'export_action_set_id_document_research_initiative', + 'enablingProperty' : 'active_referenceextraction_researchinitiative', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-similarities', + 'jobProperty' : 'export_action_set_id_document_similarities_standard', + 'enablingProperty' : 'active_documentssimilarity', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-classes', + 'jobProperty' : 'export_action_set_id_document_classes', + 'enablingProperty' : 'active_documentsclassification', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations', + 'jobProperty' : 'export_action_set_id_document_referencedDocuments', + 'enablingProperty' : 'active_citationmatching', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations-relations', + 'jobProperty' : 'export_action_set_id_citation_relations', + 'enablingProperty' : 'active_citationmatching_relations', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenceextraction-pdb', + 'jobProperty' : 'export_action_set_id_document_pdb', + 'enablingProperty' : 'active_referenceextraction_pdb', + 'enabled' : 'true' + }, + { + 'set' : 'document_software_url', + 'jobProperty' : 'export_action_set_id_document_software_url', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-software', + 'jobProperty' : 'export_action_set_id_entity_software', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-communities', + 'jobProperty' : 'export_action_set_id_document_community', + 'enablingProperty' : 'active_referenceextraction_community', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-patents', + 'jobProperty' : 'export_action_set_id_document_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-patent', + 'jobProperty' : 'export_action_set_id_entity_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-covid-19', + 'jobProperty' : 'export_action_set_id_document_covid19', + 'enablingProperty' : 'active_referenceextraction_covid19', + 'enabled' : 'true' + } + ] + + + + + + + + prepare IIS parameters + + import_islookup_service_location + import_content_objectstores_csv + import_content_object_store_location + import_mdstore_service_location + import_dataset_mdstore_ids_csv + oozie.wf.application.path + /lib/iis/primary/snapshots/2021-09-24 + IIS + /tmp/beta_inference/graph/07_graph_cleaned + import_infospace_graph_location + + import_project_concepts_context_ids_csv + aginfra,beopen,citizen-science,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,embrc,enermaps,epos,euromarine,fet-fp7,fet-h2020,fam,galaxy,gotriple,ifremer,inspired-ris,instruct,mes,neanias-underwater,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata + + + + + + + IIS main + + iisMainJobV3 + + { + 'cluster' : 'cluster', + 'oozie.wf.application.path' : 'oozie.wf.application.path', + + 'active_document_affiliation' : 'active_document_affiliation', + 'active_referenceextraction_project' : 'active_referenceextraction_project', + 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', + 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', + 'active_documentsclassification' : 'active_documentsclassification', + 'active_documentssimilarity' : 'active_documentssimilarity', + 'active_citationmatching' : 'active_citationmatching', + 'active_citationmatching_relations' : 'active_citationmatching_relations', + 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', + 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', + 'active_referenceextraction_community' : 'active_referenceextraction_community', + 'active_referenceextraction_patent' : 'active_referenceextraction_patent', + 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19', + + 'import_content_objectstores_csv' : 'import_content_objectstores_csv', + 'import_content_object_store_location' : 'import_content_object_store_location', + 'import_mdstore_service_location' : 'import_mdstore_service_location', + 'import_islookup_service_location' : 'import_islookup_service_location', + 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', + 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', + 'import_infospace_graph_location' : 'import_infospace_graph_location', + + 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', + 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', + 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', + 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', + 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', + + 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', + 'export_action_set_id_citation_relations' : 'export_action_set_id_citation_relations', + 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', + 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', + 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software', + 'export_action_set_id_document_community' : 'export_action_set_id_document_community', + 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent', + 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent', + 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19', + 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes' + } + + + { + 'import_mdstore_service_location' : 'http://services.openaire.eu:8280/is/services/mdStore', + 'import_content_object_store_location' : 'http://services.openaire.eu:8280/is/services/objectStore', + 'import_islookup_service_location' : 'http://services.openaire.eu:8280/is/services/isLookUp', + 'import_content_objectstores_csv': '258755af-0b48-41ee-9652-939c5bd2fca3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ad5f567-386d-4812-8edb-c0922eacd107_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f3b32c75-9077-4788-83ee-c8451215043c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7e780872-eff7-4a03-b5a2-13ad69c01366_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0a0fcd82-ae5c-4a10-af47-5106e881b639_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c16030ef-917e-462a-8414-2a8e2fac7619_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8f3fdd6e-a2cd-49ad-ba93-73c8184190bf_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,909b5ec2-6859-4acf-a86e-22e31933392b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cea77c2a-feba-4c02-ac3c-d51a159ec904_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2db71b6a-4838-4c25-9883-d4689148bb2a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8d7df838-62c9-4adc-8cd3-f7e84f732ff4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f90b105b-d12e-4061-99d3-0fdb85d10258_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e74e2c5-4844-40a4-b85f-b3ef920612b9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ddd7b3cc-01c2-4512-9550-52b2446f7dfa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2a09fb42-38f0-4a7a-a95d-97d81f716e77_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,83b7328f-2ab7-44eb-b213-2d770c4f074e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,99d7d7f0-4f76-4cb5-9b22-3ee8ec28b9c9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4a484614-fd61-4ca5-b520-610335065fa2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,32a76449-27eb-4b5f-b0e9-11cd9af0e035_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9d22aa91-83c6-44d4-b614-943d855734a5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,71ddf676-f6a4-48c9-9d1d-4ca6742e7316_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d1f4546f-e47c-47c7-b19c-6334633b5f7e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,03ebfd29-88a6-4757-9d05-64ff5018b91a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ba437ca2-8071-4835-9615-a14eb1fb9ff8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3cf2373a-8228-4781-b0f2-9b4343487fc9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7818e488-0ba4-4558-8f76-83cacf2f7358_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcfcbfd8-4e7a-46a8-a5ff-281c5fa11767_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b988985-c798-44f1-bff1-5574d2abe8af_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,24d97463-144b-4bb5-b6b6-7f1f5b014833_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5b84e8f6-3ab9-40ad-be6f-7b60b24b9e42_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a8c1d0ec-9f3e-42ba-8748-f9b5e62eb7a1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3cd6626e-8e0c-49ae-89a7-5e1286bddf8e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,097344eb-7312-4096-ab9d-204c6fd8395e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,42e09e1f-352b-4ba4-98e9-0826635009d9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,792e67b2-29ed-40ac-a406-61726f5921ab_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f78c1ada-b747-4862-a68b-45d61613199f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5707d04f-96b0-4e59-8607-5d095a3bd301_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8fce096c-c0d8-410d-9f07-a93ac87cf2a6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2f817fb6-3b23-474a-b137-0c4739c264b6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3a5fb37f-8d49-4432-a294-538af185ac88_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0b17e450-8f41-4edc-8d08-68f644a41c3e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cedeffa5-70ac-4415-9640-fcc997f36aa9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ce93c295-ca6d-48e7-9a28-eaf26cf57cb3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,62e02764-705a-471d-8057-359b0ed3c904_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ac5b5231-e600-43ad-8f91-16a91695d81f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,27c2d995-e379-422a-9e4c-12999d6c9846_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,990226a9-28f8-49da-be60-806e1a09addc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5e940a8b-88df-496e-a856-aa0a949effe4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,73979dd9-ccc7-495f-87ab-be0a6be4fcf0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b8eee4a1-1163-4b02-86b9-13443da6dc14_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6af002f3-cb24-4086-b912-56583e4e9bd5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,75f3089e-1cfd-40f0-bdfd-88c80e5d53f2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d794e42c-93fc-47ce-8fe8-e5e40220f770_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3bb435b7-2cec-43f1-b19e-89656d68b1b5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,82563c20-9d34-4b42-8b18-af629d483133_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,30a18588-80c2-4c10-ac6a-7823a47bbea4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,74373d10-aa93-4a81-b4a7-928801073f3d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6002b6ef-1cb3-4277-8e6b-50ca1c6b2bc0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bc097067-e9c6-44ef-9251-1dc1a9f4dd0a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,70d8e385-b668-43ac-9b08-a0e2aa6b0b3b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f3550954-6dd4-4f70-b2b2-fa332f9cf0f9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c3fd0f58-164c-420e-a0c1-45cd5de7dc8f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f8df0721-fbec-4d2c-be7f-9570406adcac_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2a51aa1c-16b2-43e4-bb84-ee353ddf77df_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f68c874d-dd8e-4006-ac73-094f6179d0ca_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,81f561d7-1008-4e85-8f49-3c862cc5257d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7f30c022-1bb8-4752-81cb-2ce948353930_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,178a2b94-08e6-44c3-b8ba-e69329681e2b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3b37b1bc-64d5-4d41-b9e2-63234421d098_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a88973fd-62ce-4284-a56f-c5197123d8f9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e9f27bb4-0180-483b-903f-44358856aec7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,75b967d0-92a9-4a02-96e6-2ccfd39f5305_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ca01e02c-630c-4e69-b9c1-28e3196dd383_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,212aec8e-a1c0-4e82-b46c-5dce0feaa560_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,75b66f48-57f6-4920-9493-b667be192659_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcd93176-ec23-4676-9149-3fe525435943_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5c75e6ff-31b0-41f7-b73c-bee9adf3fa60_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3ed071cf-f00f-4d9d-a0ba-8b50eaebeb2c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ea1c9a3d-369c-4658-b003-51d864c26659_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b203ea92-1a60-41c9-98bd-6f9f6d5a5f03_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,df08df2b-e184-4b58-8c7b-13ee24caf292_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1b7cee3d-4eb7-48d2-bd92-8166bc6bd61f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,521bc388-5d9d-43ae-9650-15767fae368e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,192bf304-42b0-4f77-93da-ecc2cb2e9c7a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ae6ac56-a0dc-4014-ad35-fa6b060458df_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,217a415a-3b7c-4aa2-a501-8f68c8fa094a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7387fc00-5cbb-417d-a334-f6ae17c1d0d7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b558b01d-e6b8-47fd-92bc-0a11d6e0ae19_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d5cd3825-141d-4c8e-bb6f-a507d81532dc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,36ff4b61-e1f1-4aa9-8149-3cc3f4259308_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,34fdb421-8142-42ee-96b1-1a3ed8850561_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b102bbb2-5197-4773-bb85-adb5f019d53e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,50789366-2c51-4caf-a0ea-1ae9466aad00_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3053ae8a-1a6b-4e4a-966f-968cdd982830_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9c86d5e7-14f6-42f4-9e00-860f235e5572_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7e04231c-9f98-4fcd-9659-865c692138f8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,343a909a-21f4-4daa-9e1a-761c2e7ee2b1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,389567f2-c640-40ce-a14d-089557398f26_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1f08eaaa-f898-4c2c-a828-f0f7ce458835_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6802f369-db89-4528-b5ec-f163b29ad804_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,45767cd1-d258-44c8-9498-48b63c836ea0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b135c9a-7516-4095-bca0-6cd09029ee25_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8c2c2853-b126-45b4-b7c4-27a53490ec1d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,88e4d72b-81a6-4b4d-93a7-57dbe6315fc0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cb8e86be-c771-4060-a7c0-87ea34056bc7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,39513d48-6f56-44a5-9df7-252897877baa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,80d6cbdd-2068-46a7-aae4-5b7978f34840_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,172957e8-5e2b-4ef7-be9f-d53159580fbc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,23638864-2e5e-448d-bd43-ddc78ed00129_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,268c97ff-4374-4720-bee6-8ac03f934093_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,05c9ad76-ac3d-418e-88fb-0ebd28b865c7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3cab6d9f-12d4-427e-baa4-992ff1e89d96_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,85624194-48c2-4f78-9dac-bd97afcdac97_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,333f85b4-4334-4eee-a137-41a889033f46_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,00299a55-13ef-4aca-a759-22982c0cae59_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9dce6dcf-dbea-4d9c-a3cf-d0c0c732d863_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f9a0d2cb-2657-4370-bbcb-0c7634a8d462_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5193e9c6-766b-456f-a5f3-6fedbdd2bbad_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,06f8b1d5-7e21-4ce2-abce-3baa5ac840be_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c5b6d4a2-892e-4ceb-a39c-ceb841739d9e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,10ecbc8b-ea58-4133-9189-896f732114f4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,881431d5-50c0-4983-bedf-09ba7b80543d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f3bc9690-d4ac-4c28-9356-bfbafea2b9f6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a3e22dcd-214b-4204-9de4-9d19e16e9dbf_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a065ba53-325a-4f0e-ac1c-1322b5f2b52f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,31ffcce8-2ba5-4f60-bf59-48f008d82ef3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2f440571-f9e9-4599-b952-446f7ab24682_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ec705b5f-f361-4a6f-8817-ef8c027c3c19_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fccd2c7c-a889-4030-b827-7bda953b90e6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b0b1a376-7c0d-438e-b1b3-86660668e11b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,acb7a4f3-abf5-4cc7-a1fc-2d27882c2e06_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,823addf2-fcc1-4007-96e8-d11806c1cffe_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0ce49a90-d900-4be5-bec6-2c64c4131817_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,79126722-17e9-4e3c-b726-cd2d010c97b9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3789780d-5fa3-4480-95b3-1a83fc0af151_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,879bbbfc-2d87-479c-a81d-8750954f0445_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,07e10331-ed9f-4173-baeb-510381815081_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1bfc3890-b54a-4072-90e6-93ba1a2da3ad_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1c85cbda-5e2e-4db9-ba93-408b84c90bd8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f9c06301-29c4-4559-8bf9-220173597a7a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e0aec0ac-d954-4bae-827f-e8908c5fbd4b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,91dde45b-158c-4e81-b7e1-108dc68dcf55_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,877113b2-affd-4a70-b67b-fe9b555399ff_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,16887dca-84c7-49f6-a352-5cb9a80e1e24_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b10c7251-0b77-4a9e-8ffd-4af48f66b6ee_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2563b673-7db2-476c-ab72-1538aebbc528_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d29af09f-6d1d-4afb-9409-d2b001bfeee1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,276c2099-31b2-4413-ad7f-efb8ae3b7f0f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6e2e600f-614c-4405-b93d-6206630a5771_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ab5b738-5a98-4dc1-a69e-bf42270defb4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ce6b6847-01eb-407f-bcff-e215702be3cd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c1e0ee7c-58ce-4430-8ffe-dc80c5c4ba8c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,af0edc23-2ba1-4fb9-a3b4-9349d2ddb09a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e416ca69-df34-424e-8589-4c2c6973dcbd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,98f665d7-662f-4a87-8b44-cb0bcc40ceb3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,67550112-8af0-4b99-ad43-1c484811b986_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,00846284-de91-42a6-9bbc-0cb5c574ea44_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6e3b2a99-a96f-4cbb-af06-36f6402ff8dd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,01919ec6-2a78-48b0-8ad0-e68f395d28ff_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,02297d7f-96c1-4191-89d6-fc5a01754c66_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6df51c9f-618e-433e-8db9-ccb302f8985f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c4e0a3a4-9c00-4625-bb0d-595d217d9159_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d7fac7fc-01b4-4f1f-a751-cf108bbe8662_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4153debc-f82a-4fc6-814a-4b47d8252eec_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,96990cb0-046f-47df-bd03-af19ed27f8c5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,60c83aed-9dc7-4f15-8cc5-25cdfcb67078_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,92f9e507-a0cb-4e40-93e8-73af84ae9d61_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c97e4d00-7c5f-49ab-b562-4e136e07de56_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c2457314-928e-4919-b82b-922d8449f3ef_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7ddd9cfe-ddcf-40ac-abfb-90f47f8f40b8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,99535b31-190b-474a-8597-1b2b89e7509a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a8ddcc4d-3b31-40ed-8126-781698bcdfd8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7735efdd-9bd4-428b-964f-e1ebabc30641_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9e5c6bc9-8e0f-491c-9f61-81a180dd7ee9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,40519d2f-4bcd-40ac-88af-106be7bc3955_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,dad827f3-5e0c-4ab2-b12b-178dceec35d5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,dffd4e39-ace7-4640-9b32-51d41c42ee35_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8bc564b6-c341-4245-b409-01b17bf9b0c6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cfcbca42-1a98-407d-aea2-f4c6d412b634_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6d277175-f0e9-4838-a4c0-17b6067ec061_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,245a0fc3-d074-4c7f-9351-ee61aa35ddb1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,66883fbd-e658-4b39-ab3d-8d08e27789e1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8fa62afa-53f5-4d1d-b05f-773c62fc7b87_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d45be243-514a-473f-9648-ef696e41b6d5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6a9b4f86-cdd1-4fa2-ae0f-71a8bd22661d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fb62b38b-25b6-404e-8636-956de83c5394_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f05821b6-b895-47df-bbbc-a0412abefa32_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,55c0f5c6-d4e8-47b0-a093-0ed389da1d33_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e4d01f17-5209-4bed-8e49-bdc0a39985d6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9f562259-c706-4c32-beb5-deb7a5aa449b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9704d935-077e-4235-ac73-75b1eb7662a1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,392c57e8-c9ff-4fb1-84b4-6e97d586b229_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4915b64b-cbbe-4b67-a4f1-7e35cb30ad8a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0db9404c-0222-42aa-bd84-aeac28e6a50c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,579a8c3a-d5de-4ac9-97c0-1e8decf4f781_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,76fb60c2-9e3b-4506-8011-bb14229ec67e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,19afa842-8f2e-4bb1-860d-f39eccd96b18_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c80b36d7-0945-4a75-a299-32c83893d19b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f89add62-eb50-401b-90a8-9441f105ca17_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,214a7942-8ff7-43c2-91be-32e6ebb67488_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f476dd49-671a-44bd-9b57-d3f6b3b15d1f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b928b5aa-3e69-43db-805f-1250abaf0456_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0271e5a0-b086-4f92-b0c8-89d7066add6b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,90e71a3b-9ea9-47be-bbf9-9a9df6b4747f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9b6b02f9-ca49-4896-bebf-d263297ab230_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6301df3f-2da8-4f16-b01c-be02340d9713_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,df85bb2c-7c83-4747-bfae-b1fd20dbc001_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0765fc2e-65e1-4510-bebe-7c9276221340_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7a39bcf1-c5e8-4eea-aadb-a2b4c43a159c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e6478ddc-5290-492d-a571-6a2e621a06c6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f0211b1c-9a1c-40f2-85e8-5e22b1d29dc9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9517683c-c63e-4914-88e8-70127e57fab8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7a705f7c-9252-4724-a3de-ead9413e68f0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,abc008fb-4648-4b92-9213-559f609e9e8c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e82a5c42-798c-4314-8d07-e51b975f9167_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c8309a3f-c374-4974-96cc-3bab9107cf36_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7ab041de-fac8-4914-add0-cbd1c5fabbdd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4a7ff1b1-43a3-4204-a026-3d7ac5aa3935_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,44fcd8ab-1b6b-4642-b1ba-f0cb9d5e1896_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,947d5bd8-2a5b-497f-bfff-26df1ca121cc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,14d784ac-b5ee-48c2-a360-4a4d9ec3acb9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,afd138ca-c90d-4220-82fc-2baf12364bdc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,213aad7f-e540-4eb6-b262-9c30cd83e848_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3b6082c5-2489-4209-ab3a-9bd8ea9be071_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2c111602-4bf5-49c3-8ad1-cc0db22242e1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bab223d1-1845-4a12-b345-8a26765c4213_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,97fa0e7e-f45b-4338-b983-726eaa7c364b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,55703b4b-7a6e-4088-ac81-5d43b6d19b33_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,03b240d3-fb71-4474-adab-ab48e7e2cc15_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a9279aea-743e-4890-b520-4f430d04716c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9528eee4-d154-4bf0-adc3-e7412d543da0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,58f3d18e-ef7c-4b21-9816-36b3f6beba55_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cfa16256-66b8-47cf-9279-b733da0f79a0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e2ea667-9887-414b-96f6-979eb76b35b4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,59afef00-c880-4e9b-88ca-8c2379b117ae_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,09685143-e732-4ce5-bcb5-4f4917f0200c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bb924eae-dc4d-474b-947b-3476acef5f27_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e1fe865-eaf7-4d09-8817-e53b2727917b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f98ee989-651a-4e7c-b092-9bcfb95529e0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,66f1c2b1-15aa-4edb-ab32-f49ce982b306_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6ba83161-8935-4c33-9dd5-4fff396a217f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,93c22207-5753-42eb-a479-b971a80b3137_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c4e74e75-9f6e-4b1d-bc01-878b823a6b76_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,26755930-7ed3-417c-81ac-bf93fbe51e57_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a770df9c-1ff2-4922-87af-d0d769378c2e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d3ffb188-6155-4f23-8666-2ae6d171f3e9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,65c55bd6-158a-4753-8aa9-03b4574f93ce_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,42d3fa69-20b5-44f3-a1b8-5f8828bbb9c9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f015f8a5-ee1c-439a-b545-8e50d45e95bc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,04a1a309-bc60-4f2d-9e5e-5d852a9ffa63_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4e1e3d94-79c5-4c3f-ac6b-e9bff29c4f0b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,55c3d882-5e9f-4db6-99b0-6b20ef388a69_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e5ab2419-aa6a-4222-9c49-393d9823f6d6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,71a6d543-9b9b-4e23-8e05-8e8d81f0fd7e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4cc9528a-882d-4cdd-b43e-eb120b0050b2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,70799645-2100-44d8-a37a-a1ef6115a9a8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,26964221-d113-4c24-ac92-4e870f647dd1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,abd48a12-9e16-46ca-a1db-0cd30e3899bc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0e768d9c-0e6d-4ead-846a-dc732624be65_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5ebea5b7-4667-4c77-b34b-1ec2fdb41e87_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcd35a53-64c6-47e6-a9ab-8b0c56aff4ef_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,593f01e4-86b2-4e38-a9d9-5c617703fe97_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,620fa76b-9033-418d-996e-d2da91f36e4d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,96ce7f94-4ecd-42e1-a96f-16d5eb4cfbb2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b965d1a7-d911-428e-98cf-de78a25b4cf6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,39f07181-4e67-4a0b-aa13-a625789834fa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1353dd10-b088-4ae0-9426-dd09a54af7cb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ab5aed4-b8eb-455b-b8ad-55566c9592d7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9ba73764-bb52-44c0-b5eb-f272b89aa2f5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8fb38f2d-8a73-4572-9694-72d85478c6f3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,65b6f6e0-c5f9-45f6-8cdb-2fa892cf0b31_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3fa68746-d403-45b0-9440-3820ab2abbe9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,68ffa111-c0ef-411b-9fd5-e8ac0bee1771_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d16b49af-3113-4a4c-a709-e860c8810eaa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c31e4305-5d8c-473e-b4e9-bad6af132857_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,773a1004-6aec-4196-98f6-135a08c6fe74_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fbdab11a-70b7-46dc-91e4-9228cb9d15e6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c590e698-eebf-448b-9cce-a7cce759a2eb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fc0cf5ba-1294-4d15-9671-620d0f50c80e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0327e908-54f3-470d-9300-fbf9d58fcfc8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,554b305d-6112-446a-8de1-24da5da45847_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b3fb2842-a6ba-4208-8203-78b8bf6b380b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a87e2d54-4801-425a-a2ff-085d199d2165_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e5390536-1946-421f-8427-fe1440403ea5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7475a216-56da-44c7-92f6-ff1368ff8167_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9795c4f7-21c4-4876-9ab7-d1c717fc2985_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,50ca9829-300a-4bf6-96e5-33f1fb81d4d9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3b2ce259-00a1-4fd2-b4d5-8e1a6915cced_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e43a557e-56d2-4d4d-8fbe-dd54140842d6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1eb5e1ee-ef00-446f-bf8e-e6e86d1c0911_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9f0b8522-96bf-41eb-b3af-703530077f72_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,522e4345-f690-4b76-8ef2-fbce695debfa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f77c9673-1b3e-416d-b935-e0b0cade1ddb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7fcb1dcb-8cb3-40f0-a965-dcbfa6cdce0c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e7accb29-1624-4731-8ada-666be2bc9ca4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3f92004a-6fc8-4a31-a427-6a7035d9cf02_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,13b4cf1a-37e2-4cdd-b08d-14352cef6ba7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9174da20-ff65-4875-8a36-7bb201a71ac3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3bcea802-91c8-418c-9f1a-c136ed4513cd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,32c27158-2a5f-4ce2-acf8-be57fe7087fe_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c924ea03-1721-4db9-a432-bf917b18e57e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6325fe16-5f6d-45fa-ad4b-83ace4b371a7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d53d61a3-be3b-4a59-9051-3372768ebb7b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cc8f9cce-b55f-4d54-8e65-8481fe6c1f1e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9c73db00-24fe-4d1b-8099-18f47089acf1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,37fc21cb-d23e-45e5-9959-82135d658ba6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4ebf4c19-0401-4d2f-b578-692c6962840b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,18e76570-2d08-4457-bf8a-c3a8d14fe346_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fd0b3cb1-bfc4-4679-a320-2277c2cf7181_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,01336277-976b-41c4-92b6-507f3716fa7b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cde297e1-54ee-44f5-99ad-23938b40ef89_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e42e429-23ce-42fc-be9a-77b99d742cd2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9b7eb529-d04e-4301-9078-fb02b79ee6e7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ef21449b-bde0-46f1-91a0-720a2c9921bb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fa856b69-08e0-4e99-8317-b2abdf007444_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e557faae-63d7-4439-b32b-d11cdedd1820_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a749af27-1fdd-4a6f-99f4-dbb9f05ba51b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,54a3ee50-46fc-492e-83f8-e3be5cf34a8f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,68ff4d3c-e1c5-443a-a575-6ba47c77c517_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,af10fbe8-8608-4801-890c-f6c01efbe0bb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0d0901b3-daeb-4b16-8915-fdf6fc0d0521_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f1a05918-9c80-4591-b889-a338feedd0d4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,627f58cb-59ee-4846-991a-45e16c7cc3b8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fe58f033-823e-473d-8234-8ac5dc22f347_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2a93d54b-2c70-4f8e-935e-ffea440a4324_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,633f943b-f8a5-4702-8bc2-39df981ace6f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,82e48289-28f3-4fca-89fe-ca0655b8db87_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2f0894c1-2c82-4ebf-b8d6-07987369ece7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f63e4258-a3fb-4383-ad43-d415ca032b2a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1568c709-b306-46df-8630-73d7aab57a9c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8f4bdef7-6301-44e3-884f-1e037ee4ea4c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d504551c-e608-43d2-be64-1c30067d3396_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,684c4d40-8f74-4965-a18c-8204c6f2d466_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,54e0a050-e716-40ba-bc97-ae80d0df9c8e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0a443e4c-8b18-4f6c-99f9-9fddabd13986_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e1838b1-3485-421f-8806-57e14f46f00f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7737dd48-a89a-4fe7-ba95-29445391baed_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d4cb47b7-ac3f-4034-bc7f-021b62ee7612_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,09f6b0dd-fdf5-4674-888e-5e75291a4181_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fb45b9f6-e3fb-440e-9459-b15df356372b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a355978e-abc2-4f1a-83f5-20e8d4189e91_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,aa11b8f2-1c33-404c-8227-1d27fa948a01_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,53a09ffb-b8ef-4be9-9363-6940aadbf414_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e29fc1b0-db4d-4f89-88d1-bdecef46546f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7157d5dd-464a-4143-941a-c99d1ed25886_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f9a1f806-0ced-4b22-9b33-6e52296f47d9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,208c6e8c-ad86-4e69-9805-8342de977835_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,682feb2e-563c-407a-9480-32a260c819b3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b9ae064d-e76a-44e6-aab5-2c0de2d4aea7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9be76f8a-ce7a-4d15-bd3b-2d182a265c1a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b5e7343-be40-4505-844a-7cd763358215_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,57b8dfa5-154a-4e6e-8409-5de1e4d3e12e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,df135a7e-1cf6-487e-b231-d0130cbb3e94_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,99641569-0e73-482b-a4b9-c880ec2ca281_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,68c903e3-c0a5-4cc0-bec1-8f86f98b78ba_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,972456e7-148a-4849-8d1a-33d12ee57fac_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5f5a112c-c38d-4973-acf1-4aefbb6608cd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2cbead31-3ad5-4533-97a9-57169ae2d18d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,67098888-0feb-41f6-9dc0-49b11436eef3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1e28984d-b120-45b0-ad4e-91b563186e2f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,50a2a72b-ea21-4ee8-8956-0d35ae1eb2e2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,07413001-9767-4a53-94bc-d911ad6bf9fa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6f93032a-936f-4f70-8e6c-cd4935dce538_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,492066de-7eda-4ea2-bed7-84f498138b82_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,be1b0d20-b721-4d91-beb4-b34a2c30ea92_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ef1b59bd-5245-4e24-ab34-6bcbdedc69a7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,02fa6adf-4cc6-4f99-8368-08da9b78885a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cd6e7a8d-468b-43c1-95ae-8d869691787f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ce7127d6-99fd-42be-b86c-1fcc9c1dc0c6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a96337dc-fa05-4968-9ba0-33a981278e37_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,542e43fb-2a43-4185-b308-5967ad9d9ac2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5137ca31-2ac8-41e9-ada9-4dc20f3839bf_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,36618a76-f6a9-45e9-a097-1b50a236afe6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,06f1c21c-9509-49e9-8032-7f83e426df55_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,058a8510-05ea-4d44-a12d-91b759ff50dd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bee33dd6-dea6-4a82-a30c-362adb0045c1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,736fb544-c839-48d1-bcb6-aac4c0b8f995_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a98acfba-1f27-448b-a625-42263b985cdd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,71de8115-0290-42f9-8101-fd01953bb93b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2913194c-f136-429c-8414-9f06156334e5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,44e6daf0-db3f-4210-92f7-8e39054584b1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,63140bbb-ad8a-429c-b66c-9f08a8cc2d6b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4630a2a0-69ef-4558-896f-402d4820aa1f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9a04b295-8738-40ee-a9a9-155abe853cd4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b0159d0d-42e9-4fee-9e88-3711bfa5180d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1f295a0c-1ca6-40b3-844e-cdff57520cc6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2890a3d5-0b79-48b9-874f-f157911b7bda_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f4f56e08-58d6-4bfa-a747-68f0ab436afb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,612889ab-8e79-465a-9e76-ff881f74932c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0df9c398-9133-41ca-a8d8-6dd1df3475c8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bffbeb98-b8b1-4186-9ebe-440a3533a5d7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6d4119b2-8139-4a60-9080-cb5dc22b58ef_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,600ae3a4-d4fc-4146-9002-d225ffdb25aa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9b1acfe1-28e1-4d8d-935d-f99d1cd23c8b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d2d82597-b8a5-4ee7-a3e2-16df33095de9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,81174703-1336-408b-b740-1566e032df53_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,747cece6-1b2b-47b6-850a-143b92b034ea_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b5f5c68d-d68d-4b53-8c2c-8daa765dc252_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d74fbcd9-6662-49c5-a809-9d892ae17c37_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d19bdb3a-07eb-497b-af16-0700a2ac0bad_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ff2d259-85ca-4227-a581-4270d1e2188f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,153fdbf9-5594-4425-a05c-5e804c01ecfb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d23e27c8-4696-4c59-9db4-e670acc8b922_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,76b04d5f-68c8-41d1-91f5-bd5e683c69b8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcc406ac-80a4-436c-af81-da5659b0ba65_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,14c0d716-4b07-4861-8108-8a98ee259f5c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8b81d43a-88c4-47dd-af1f-a31795429ffb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,03bdbb2f-95d8-40c2-be26-06b3153849a1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,25828ad6-ce19-4869-ab00-f50c8af89912_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f043d6bf-1e27-4700-a099-bd6e1718bfc3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ed98162b-984d-48b5-9069-4e4676c220dd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,74cf3342-d8ff-44c4-9b6c-82f32804fbb5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,06857595-9a09-4ba9-b520-03962f211782_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a98cfd91-a351-49cf-a901-2b731aed3d17_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e2d0c4bf-c496-4637-9b36-c9989a8b26e0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ddbd3cb5-b244-425a-a975-94fb8eba5a85_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a710a888-42f2-4885-bfe7-dfb95b62494e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e7cba260-b7dc-43b8-922f-5686a7daa57a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b3ce677-ce27-4a26-b08a-361054209447_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,be4040a8-1ab6-4e07-bfe6-b054fe1e26d3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e33a1a7e-9a38-451a-b1b7-a7677059e691_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a855a42c-f254-4749-a2e5-1053d5518ea9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f6af618a-c667-4b57-b67f-947b0f0e5956_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f68904d3-50f9-424f-a6a8-d643886d01f4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,28ea697d-8e0c-417d-bd46-f0088c160bf0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6d74a029-4569-44ec-9e70-68d78cb7386c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2d8a6b94-7bd2-4f80-b60c-5a9f4aafda2c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,62f6b1f4-e06b-4106-bf34-fbaa637b5edd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cefb2ed0-5f56-4884-9444-5343e6ff248c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3da72380-7057-489e-88a9-77160abc124d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2bc98bbc-66e3-4d7d-b113-619cc4ff6ff7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,af213802-4a8f-44d8-8a9a-db80e39ecb74_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e43c85ac-14e8-4a08-b575-ce9220cd448a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9279d3f2-270b-4d23-a653-02573d4a1e85_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ARCS3PDF-0e73-473b-8502-71793ea55769_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2dca6fc4-fdfb-42f6-a18c-bfa6b1101ad6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4a4cf004-7f89-4362-869c-2b3fbb0c5266_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,18857150-a6f4-4315-8296-bd15ee8b0445_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,646d810f-4100-45da-96b6-47be0c3258ce_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3239c2d2-7efe-43bd-8c5f-29e3c5f0b81f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b8ff4cbc-a503-427d-bd4f-9ddf74210440_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,681407d2-10e2-41ce-87aa-d96c55c41743_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,82007298-16a8-41a0-876c-a018e24c40b0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e558703f-7d1c-45b4-b322-79fa83fc6b19_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,01d78f0a-4f3c-4c14-b47e-8a602640e04f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,915a35fb-8be9-47d9-92b0-130d7303df59_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c449e02d-cc9a-4737-b20c-fb5f8ba0fbfc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,62d58ec0-78bf-408b-b6a3-8f0a1477e2a8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl' + } + + false + build-report + + + + + + + update IIS action sets + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'resolvedGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_experiment/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'inferredGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_experiment/working_dir/dedup', + 'whiteListPath' : '/data/dedup/whitelist_beta', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G', + 'sparkDriverMemory' : '6G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_experiment/working_dir/dedup' + } + + build-report + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/orcid', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are in parent/child relationships. The childs to the parents + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'affiliationSemRelGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation_semrel/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/affiliationsemrel' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationSemRelGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/funding', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/community_semrel', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/beta_experiment/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist':'10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20211206_093743_83 + 2021-12-06T10:12:32+00:00 + SUCCESS + + + +
From 92fd69e25da548827d857b08371cb99ffd7f6aaa Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 3 Jan 2022 15:23:06 +0100 Subject: [PATCH 122/176] [SDG-FOS] alternative way to get input data to avoid OOM error while getting csv --- .../dnetlib/dhp/actionmanager/Constants.java | 7 +- .../GetFOSSparkJob.java | 91 +++++++++ .../GetInputData.java | 80 -------- .../GetSDGSparkJob.java | 91 +++++++++ .../PrepareSDGSparkJob.java | 119 ++++++------ .../model/SDGDataModel.java | 59 +++--- .../get_input_parameters.json | 14 +- .../oozie_app/workflow.xml | 46 ++++- .../createunresolvedentities/PrepareTest.java | 168 +++++++---------- .../createunresolvedentities/ProduceTest.java | 174 +++++++++--------- 10 files changed, 468 insertions(+), 381 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetInputData.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index ef4ef6756..3a46228d8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -19,7 +19,7 @@ public class Constants { public static final String DOI = "doi"; - public static final char DEFAULT_DELIMITER = ','; + public static final String DEFAULT_DELIMITER = ","; public static final String UPDATE_DATA_INFO_TYPE = "update"; public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; @@ -55,7 +55,8 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - public static StructuredProperty getSubject(String sbj, String classid, String classname, String diqualifierclassid) { + public static StructuredProperty getSubject(String sbj, String classid, String classname, + String diqualifierclassid) { if (sbj.equals(NULL)) return null; StructuredProperty sp = new StructuredProperty(); @@ -78,7 +79,7 @@ public class Constants { false, OafMapperUtils .qualifier( - diqualifierclassid, + diqualifierclassid, UPDATE_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java new file mode 100644 index 000000000..75fe42e90 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; +import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetFOSSparkJob implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(GetFOSSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + GetFOSSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + // the path where the original fos csv file is stored + final String sourcePath = parser.get("sourcePath"); + log.info("sourcePath {}", sourcePath); + + // the path where to put the file as json + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String delimiter = Optional + .ofNullable(parser.get("delimiter")) + .orElse(DEFAULT_DELIMITER); + + SparkConf sconf = new SparkConf(); + runWithSparkSession( + sconf, + isSparkSessionManaged, + spark -> { + getFOS( + spark, + sourcePath, + outputPath, + delimiter); + }); + } + + private static void getFOS(SparkSession spark, String sourcePath, String outputPath, String delimiter) { + Dataset fosData = spark + .read() + .format("csv") + .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(sourcePath); + + fosData.map((MapFunction) r -> { + FOSDataModel fosDataModel = new FOSDataModel(); + fosDataModel.setDoi(r.getString(0).toLowerCase()); + fosDataModel.setLevel1(r.getString(1)); + fosDataModel.setLevel2(r.getString(2)); + fosDataModel.setLevel3(r.getString(3)); + return fosDataModel; + }, Encoders.bean(FOSDataModel.class)) + .write() + .mode(SaveMode.Overwrite) + .json(outputPath); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetInputData.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetInputData.java deleted file mode 100644 index 499e42a8e..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetInputData.java +++ /dev/null @@ -1,80 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities; - -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; -import java.util.zip.GZIPInputStream; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; - -public class GetInputData implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(GetInputData.class); - - - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - GetInputData.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json")))); - - parser.parseArgument(args); - - // the path where the original fos csv file is stored - final String sourcePath = parser.get("sourcePath"); - log.info("sourcePath {}", sourcePath); - - // the path where to put the file as json - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}", outputPath); - - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); - - final String classForName = parser.get("classForName"); - log.info("classForName {}", classForName); - - final char delimiter = Optional - .ofNullable(parser.get("delimiter")) - .map(s -> s.charAt(0)) - .orElse(DEFAULT_DELIMITER); - log.info("delimiter {}", delimiter); - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - new GetInputData().doRewrite(sourcePath, outputPath, classForName, delimiter, fileSystem); - - } - - public void doRewrite(String inputPath, String outputFile, String classForName, char delimiter, FileSystem fs) - throws IOException, ClassNotFoundException { - - - // reads the csv and writes it as its json equivalent - try (InputStreamReader reader = new InputStreamReader(new GZIPInputStream(fs.open(new Path(inputPath))))) { - eu.dnetlib.dhp.common.collection.GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); - } - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java new file mode 100644 index 000000000..328075389 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; +import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetSDGSparkJob implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(GetSDGSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + GetSDGSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + // the path where the original fos csv file is stored + final String sourcePath = parser.get("sourcePath"); + log.info("sourcePath {}", sourcePath); + + // the path where to put the file as json + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String delimiter = Optional + .ofNullable(parser.get("delimiter")) + .orElse(DEFAULT_DELIMITER); + + SparkConf sconf = new SparkConf(); + runWithSparkSession( + sconf, + isSparkSessionManaged, + spark -> { + getSDG( + spark, + sourcePath, + outputPath, + delimiter); + }); + } + + private static void getSDG(SparkSession spark, String sourcePath, String outputPath, String delimiter) { + Dataset sdgData = spark + .read() + .format("csv") + .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(sourcePath); + + sdgData.map((MapFunction) r -> { + SDGDataModel sdgDataModel = new SDGDataModel(); + sdgDataModel.setDoi(r.getString(0).toLowerCase()); + sdgDataModel.setSbj(r.getString(1)); + + return sdgDataModel; + }, Encoders.bean(SDGDataModel.class)) + .filter((FilterFunction) sdg -> sdg.getSbj() != null) + .write() + .mode(SaveMode.Overwrite) + .json(outputPath); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java index b0607ead5..27da77c0c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -1,11 +1,13 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.utils.DHPUtils; +import static eu.dnetlib.dhp.actionmanager.Constants.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -16,71 +18,72 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import static eu.dnetlib.dhp.actionmanager.Constants.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareSDGSparkJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PrepareSDGSparkJob.class); + private static final Logger log = LoggerFactory.getLogger(PrepareSDGSparkJob.class); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - PrepareSDGSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json")); + String jsonConfiguration = IOUtils + .toString( + PrepareSDGSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); + parser.parseArgument(args); - Boolean isSparkSessionManaged = isSparkSessionManaged(parser); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String sourcePath = parser.get("sourcePath"); - log.info("sourcePath: {}", sourcePath); + String sourcePath = parser.get("sourcePath"); + log.info("sourcePath: {}", sourcePath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - doPrepare( - spark, - sourcePath, - - outputPath); - }); - } - - private static void doPrepare(SparkSession spark, String sourcePath, String outputPath) { - Dataset sdgDataset = readPath(spark, sourcePath, SDGDataModel.class); - - - sdgDataset.groupByKey((MapFunction)r -> r.getDoi().toLowerCase(),Encoders.STRING()) - .mapGroups((MapGroupsFunction)(k,it) -> { - Result r = new Result(); - r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); - SDGDataModel first = it.next(); - Listsbjs = new ArrayList<>(); - sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); - it.forEachRemaining(s -> sbjs.add(getSubject(s.getSbj(),SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); - r.setSubject(sbjs); - return r; - },Encoders.bean(Result.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/sdg"); - } + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + doPrepare( + spark, + sourcePath, + outputPath); + }); + } + private static void doPrepare(SparkSession spark, String sourcePath, String outputPath) { + Dataset sdgDataset = readPath(spark, sourcePath, SDGDataModel.class); + sdgDataset + .groupByKey((MapFunction) r -> r.getDoi().toLowerCase(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + Result r = new Result(); + r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); + SDGDataModel first = it.next(); + List sbjs = new ArrayList<>(); + sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); + it + .forEachRemaining( + s -> sbjs + .add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); + r.setSubject(sbjs); + return r; + }, Encoders.bean(Result.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/sdg"); + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java index 7b1584e3f..98ba5045c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java @@ -1,48 +1,47 @@ -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; -import com.opencsv.bean.CsvBindByPosition; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; import java.io.Serializable; -public class SDGDataModel implements Serializable{ +import com.opencsv.bean.CsvBindByPosition; - @CsvBindByPosition(position = 0) +public class SDGDataModel implements Serializable { + + @CsvBindByPosition(position = 0) // @CsvBindByName(column = "doi") - private String doi; + private String doi; - @CsvBindByPosition(position = 1) + @CsvBindByPosition(position = 1) // @CsvBindByName(column = "sdg") - private String sbj; + private String sbj; + public SDGDataModel() { - public SDGDataModel() { + } - } + public SDGDataModel(String doi, String sbj) { + this.doi = doi; + this.sbj = sbj; - public SDGDataModel(String doi, String sbj) { - this.doi = doi; - this.sbj = sbj; + } - } + public static SDGDataModel newInstance(String d, String sbj) { + return new SDGDataModel(d, sbj); + } - public static SDGDataModel newInstance(String d, String sbj) { - return new SDGDataModel(d, sbj); - } + public String getDoi() { + return doi; + } - public String getDoi() { - return doi; - } + public void setDoi(String doi) { + this.doi = doi; + } - public void setDoi(String doi) { - this.doi = doi; - } + public String getSbj() { + return sbj; + } - - public String getSbj() { - return sbj; - } - - public void setSbj(String sbj) { - this.sbj = sbj; - } + public void setSbj(String sbj) { + this.sbj = sbj; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json index 050a25677..5a6a63774 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json @@ -19,15 +19,9 @@ "paramRequired": false }, { - "paramName": "hnn", - "paramLongName": "hdfsNameNode", - "paramDescription": "the path used to store the HostedByMap", - "paramRequired": true - }, - { - "paramName": "cfn", - "paramLongName": "classForName", - "paramDescription": "the path used to store the HostedByMap", - "paramRequired": true + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "the delimiter if different from the default one (,)", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml index 9451af572..a80bf4fbd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -107,17 +107,30 @@
- - eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetInputData - --hdfsNameNode${nameNode} + + yarn + cluster + Gets Data from FOS csv file + eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --sourcePath${fosPath} --outputPath${workingDir}/input/fos - --classForNameeu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel - + + yarn @@ -144,17 +157,30 @@ - - eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetInputData - --hdfsNameNode${nameNode} + + yarn + cluster + Gets Data from SDG csv file + eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetSDGSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --sourcePath${sdgPath} --outputPath${workingDir}/input/sdg - --classForNameeu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel - + + yarn diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 17137c39a..166430c2f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -10,7 +10,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.stream.Collectors; -import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -26,6 +25,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.schema.oaf.Result; public class PrepareTest { @@ -148,37 +148,6 @@ public class PrepareTest { } - @Test - void getFOSFileTest() throws IOException, ClassNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv") - .getPath(); - final String outputPath = workingDir.toString() + "/fos.json"; - - new GetInputData() - .doRewrite( - sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel", - ',', fs); - - BufferedReader in = new BufferedReader( - new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class); - - System.out.println(new ObjectMapper().writeValueAsString(fos)); - count += 1; - } - - assertEquals(39, count); - - } - - - @Test void fosPrepareTest() throws Exception { final String sourcePath = getClass() @@ -206,7 +175,6 @@ public class PrepareTest { assertEquals(20, tmp.count()); assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); - assertTrue( tmp .filter(r -> r.getId().equals(doi1)) @@ -249,105 +217,101 @@ public class PrepareTest { } - @Test - void getSDGFileTest() throws IOException, ClassNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv") - .getPath(); - final String outputPath = workingDir.toString() + "/sdg.json"; - - new GetInputData() - .doRewrite( - sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel", - ',', fs); - - BufferedReader in = new BufferedReader( - new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - SDGDataModel sdg = new ObjectMapper().readValue(line, SDGDataModel.class); - - System.out.println(new ObjectMapper().writeValueAsString(sdg)); - count += 1; - } - - assertEquals(37, count); - - } - @Test void sdgPrepareTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") + .getPath(); PrepareSDGSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, - "-outputPath", workingDir.toString() + "/work" + "-outputPath", workingDir.toString() + "/work" - }); + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/work/sdg") - .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + .textFile(workingDir.toString() + "/work/sdg") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); String doi1 = "unresolved::10.1001/amaguidesnewsletters.2019.sepoct02::doi"; assertEquals(32, tmp.count()); assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); - assertTrue( - tmp - .filter(r -> r.getId().equals(doi1)) - .flatMap(r -> r.getSubject().iterator()) - .map(sbj -> sbj.getValue()) - .collect() - .contains("3. Good health")); + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("3. Good health")); assertTrue( - tmp - .filter(r -> r.getId().equals(doi1)) - .flatMap(r -> r.getSubject().iterator()) - .map(sbj -> sbj.getValue()) - .collect() - .contains("8. Economic growth")); - + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("8. Economic growth")); } - @Disabled + @Test - void test2() throws Exception { + void test3() throws Exception { + final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz"; + + final String outputPath = workingDir.toString() + "/fos.json"; + GetFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + + "-outputPath", outputPath + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(outputPath) + .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); + + tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); + + } + + @Test + void test4() throws Exception { final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; - final String outputPath = workingDir.toString() + "/sdg.json"; + GetSDGSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, - new GetInputData() - .doRewrite( - sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel", - ',', fs); + "-outputPath", outputPath - BufferedReader in = new BufferedReader( - new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); + }); - String line; - int count = 0; - while ((line = in.readLine()) != null) { - SDGDataModel sdg = new ObjectMapper().readValue(line, SDGDataModel.class); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - System.out.println(new ObjectMapper().writeValueAsString(sdg)); - count += 1; - } + JavaRDD tmp = sc + .textFile(outputPath) + .map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class)); + tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null)); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 28fce4adb..02c6582f1 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -7,7 +7,6 @@ import java.nio.file.Path; import java.util.List; import java.util.stream.Collectors; -import eu.dnetlib.dhp.actionmanager.Constants; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.Constants; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; @@ -349,60 +349,58 @@ public class ProduceTest { } - private JavaRDD getResultJavaRDDPlusSDG() throws Exception { final String bipPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") + .getPath(); PrepareBipFinder - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", bipPath, - "--outputPath", workingDir.toString() + "/work" + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", bipPath, + "--outputPath", workingDir.toString() + "/work" - }); + }); final String fosPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") + .getPath(); PrepareFOSSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", fosPath, - "-outputPath", workingDir.toString() + "/work" - }); + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", fosPath, + "-outputPath", workingDir.toString() + "/work" + }); final String sdgPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") + .getPath(); PrepareSDGSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sdgPath, - "-outputPath", workingDir.toString() + "/work" - }); + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sdgPath, + "-outputPath", workingDir.toString() + "/work" + }); SparkSaveUnresolved.main(new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", workingDir.toString() + "/work", + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", workingDir.toString() + "/work", - "-outputPath", workingDir.toString() + "/unresolved" + "-outputPath", workingDir.toString() + "/unresolved" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); return sc - .textFile(workingDir.toString() + "/unresolved") - .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + .textFile(workingDir.toString() + "/unresolved") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); } - @Test void produceTestSomeNumbersWithSDG() throws Exception { @@ -414,19 +412,19 @@ public class ProduceTest { Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); Assertions - .assertEquals( - 50, tmp - .filter(row -> !row.getId().equals(doi)) - .filter(row -> row.getSubject() != null) - .count()); + .assertEquals( + 50, tmp + .filter(row -> !row.getId().equals(doi)) + .filter(row -> row.getSubject() != null) + .count()); Assertions - .assertEquals( - 85, - tmp - .filter(row -> !row.getId().equals(doi)) - .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) - .count()); + .assertEquals( + 85, + tmp + .filter(row -> !row.getId().equals(doi)) + .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) + .count()); } @@ -437,35 +435,35 @@ public class ProduceTest { JavaRDD tmp = getResultJavaRDDPlusSDG(); Assertions - .assertEquals( - 7, tmp - .filter(row -> row.getId().equals(doi)) - .collect() - .get(0) - .getSubject() - .size()); + .assertEquals( + 7, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); List sbjs = tmp - .filter(row -> row.getId().equals(doi)) - .flatMap(row -> row.getSubject().iterator()) - .collect(); + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); Assertions - .assertEquals( - true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); Assertions - .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("1. No poverty"))); + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("1. No poverty"))); } @@ -475,25 +473,25 @@ public class ProduceTest { JavaRDD tmp = getResultJavaRDDPlusSDG(); List sbjs_sdg = tmp - .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) - .flatMap(row -> row.getSubject().iterator()) - .filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID)) - .collect(); + .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) + .flatMap(row -> row.getSubject().iterator()) + .filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID)) + .collect(); sbjs_sdg.forEach(sbj -> Assertions.assertEquals("SDG", sbj.getQualifier().getClassid())); sbjs_sdg - .forEach( - sbj -> Assertions - .assertEquals( - "Sustainable Development Goals", sbj.getQualifier().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals( + "Sustainable Development Goals", sbj.getQualifier().getClassname())); sbjs_sdg - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); sbjs_sdg - .forEach( - sbj -> Assertions - .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); sbjs_sdg.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); sbjs_sdg.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); @@ -501,23 +499,23 @@ public class ProduceTest { sbjs_sdg.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); sbjs_sdg.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); sbjs_sdg - .forEach( - sbj -> Assertions.assertEquals("subject:sdg", sbj.getDataInfo().getProvenanceaction().getClassid())); + .forEach( + sbj -> Assertions.assertEquals("subject:sdg", sbj.getDataInfo().getProvenanceaction().getClassid())); sbjs_sdg - .forEach( - sbj -> Assertions - .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); sbjs_sdg - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); sbjs_sdg - .forEach( - sbj -> Assertions - .assertEquals( - ModelConstants.DNET_PROVENANCE_ACTIONS, - sbj.getDataInfo().getProvenanceaction().getSchemename())); + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); } } From a6977197b3925c520f76961cfff7b05c37a51d6f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 3 Jan 2022 17:25:26 +0100 Subject: [PATCH 123/176] serialise records in the OAF-store-graph mdstores in json format. Read them again in the graph construction phase using a tolerant parser to support backward compatible changes in the evolution of the schema --- .../dhp/collection/CollectionUtils.scala | 13 ++++++++ .../GenerateDataciteDatasetSpark.scala | 16 +++++----- .../bio/SparkTransformBioDatabaseToOAF.scala | 10 +++--- .../ebi/SparkCreateBaselineDataFrame.scala | 14 ++++---- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 7 ++-- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 2 +- .../raw/CopyHdfsOafSparkApplication.scala | 32 ++++++++++++++++--- 7 files changed, 65 insertions(+), 29 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala index 11ecfd6cb..26fcecbbd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.collection +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelSupport import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode} object CollectionUtils { @@ -46,4 +48,15 @@ object CollectionUtils { List() } + def saveDataset(d: Dataset[Oaf], targetPath: String):Unit = { + implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + val mapper = new ObjectMapper + + d + .flatMap(i => CollectionUtils.fixRelations(i)) + .filter(i => i != null) + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write.mode(SaveMode.Overwrite).save(targetPath) + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index e1607ee9c..d11c33fb4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -2,14 +2,14 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.AbstractScalaApplication -import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile import eu.dnetlib.dhp.utils.ISLookupClientFactory -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} @@ -73,12 +73,12 @@ class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - spark.read.load(sourcePath).as[DataciteType] - .filter(d => d.isActive) - .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) - .filter(d => d != null) - .flatMap(i => fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + spark.read.load(sourcePath).as[DataciteType] + .filter(d => d.isActive) + .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) + .filter(d => d != null), + targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index fcceacd44..27caa8f36 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkTransformBioDatabaseToOAF { @@ -36,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))), targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))), targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)), targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))), targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 660a26a6c..0fea4ff7f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -1,8 +1,9 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.oaf.Result +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} import eu.dnetlib.dhp.sx.bio.pubmed._ import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils @@ -177,7 +178,7 @@ object SparkCreateBaselineDataFrame { implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) - implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) if (!"true".equalsIgnoreCase(skipUpdate)) { downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) @@ -192,9 +193,10 @@ object SparkCreateBaselineDataFrame { } val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle] - exported_dataset - .map(a => PubMedToOaf.convert(a, vocabularies)).as[Result] - .filter(p => p != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(exported_dataset + .map(a => PubMedToOaf.convert(a, vocabularies)).as[Oaf] + .filter(p => p != null), + targetPath) + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 12af4824b..cd03f004d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -34,10 +34,9 @@ object SparkEBILinksToOaf { val ebLinks: Dataset[EBILinkItem] = spark.read.load(sourcePath).as[EBILinkItem].filter(l => l.links != null && l.links.startsWith("{")) - ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) + CollectionUtils.saveDataset(ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) - .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)), + targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 4a93a7cb4..65717adff 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -109,7 +109,7 @@ object PubMedToOaf { * @param vocabularies the vocabularies * @return The OAF instance if the mapping did not fail */ - def convert(article: PMArticle, vocabularies: VocabularyGroup): Result = { + def convert(article: PMArticle, vocabularies: VocabularyGroup): Oaf = { if (article.getPublicationTypes == null) return null diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index 91d2bafe3..1376c6b35 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -1,6 +1,6 @@ package eu.dnetlib.dhp.oa.graph.raw -import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.ModelSupport @@ -8,7 +8,10 @@ import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse import org.slf4j.LoggerFactory + import scala.collection.JavaConverters._ import scala.io.Source @@ -45,18 +48,21 @@ object CopyHdfsOafSparkApplication { log.info("hdfsPath: {}", hdfsPath) implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + import spark.implicits._ val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList if (validPaths.nonEmpty) { - val oaf = spark.read.load(validPaths: _*).as[Oaf] - val mapper = new ObjectMapper() - val l = ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + val oaf = spark.read.load(validPaths: _*).as[String] + val mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + val l = ModelSupport.oafTypes.entrySet.asScala.toList l.foreach( e => - oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) + oaf + .filter(o => isOafType(o, e.getKey)) + .map(j => mapper.readValue(j, e.getValue).asInstanceOf[Oaf]) .map(s => mapper.writeValueAsString(s))(Encoders.STRING) .write .option("compression", "gzip") @@ -65,4 +71,20 @@ object CopyHdfsOafSparkApplication { ) } } + + def isOafType(input: String, oafType: String): Boolean = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + if (oafType == "relation") { + val hasSource = (json \ "source").extractOrElse[String](null) + val hasTarget = (json \ "target").extractOrElse[String](null) + + hasSource != null && hasTarget != null + } else { + val hasId = (json \ "id").extractOrElse[String](null) + val resultType = (json \ "resulttype" \ "classid").extractOrElse[String](null) + hasId != null && oafType.equalsIgnoreCase(resultType) + } + + } } From bd59b58efbd3dd65f305bdece42f45a126773dfe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 11:26:56 +0100 Subject: [PATCH 124/176] test for the tolerant deserialisation utility method --- .../raw/CopyHdfsOafSparkApplicationTest.java | 65 +++++++++++++++++++ .../dnetlib/dhp/oa/graph/raw/dataset_1.json | 3 + .../dhp/oa/graph/raw/publication_1.json | 1 + .../raw/publication_2_unknownProperty.json | 1 + .../dnetlib/dhp/oa/graph/raw/relation_1.json | 6 ++ 5 files changed, 76 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java new file mode 100644 index 000000000..85cb551bc --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.oa.graph.raw; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +public class CopyHdfsOafSparkApplicationTest { + + @Test + void testIsOafType() throws IOException { + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/publication_1.json")), + "publication")); + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json")), + "dataset")); + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/relation_1.json")), + "relation")); + + assertFalse( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/publication_1.json")), + "dataset")); + assertFalse( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json")), + "publication")); + + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass() + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json")), + "publication")); + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json new file mode 100644 index 000000000..c22dc94e3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json new file mode 100644 index 000000000..bd3710234 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json @@ -0,0 +1 @@ +{"author":[{"fullname":"Makkonen, Lasse","name":"Lasse","pid":[],"rank":1,"surname":"Makkonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Watson, Rick"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"dateofcollection":"2021-10-04T12:42:57.502Z","dateoftransformation":"2021-10-04T15:32:51.877Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433424020,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-10-26T12:07:44Z","harvestDate":"2021-10-04T12:42:57.502Z","identifier":"oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Irish Wind Energy Association"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Makkonen , L 1997 , Calculation of ice accretion on wind turbine blades . in R Watson (ed.) , EWEC '97: European Wind Energy Conference : Proceedings of the international conference . Irish Wind Energy Association , Slane , European Wind Energy Conference EWEC '97 , Dublin , Ireland , 6/10/97 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Calculation of ice accretion on wind turbine blades"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json new file mode 100644 index 000000000..364796c24 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json @@ -0,0 +1 @@ +{"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353", "resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"}, "unknownProperty": "asdasd"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json new file mode 100644 index 000000000..31755c53d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json @@ -0,0 +1,6 @@ +{"source":"1a","target":"10a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"10a","target":"1a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file From 045d767013651f3831cfcd0f59a0b73487d3967e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 14:23:01 +0100 Subject: [PATCH 125/176] OAF-store-graph mdstores: save them in text format --- .../scala/eu/dnetlib/dhp/collection/CollectionUtils.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala index 26fcecbbd..509e464e5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -56,7 +56,10 @@ object CollectionUtils { .flatMap(i => CollectionUtils.fixRelations(i)) .filter(i => i != null) .map(r => mapper.writeValueAsString(r))(Encoders.STRING) - .write.mode(SaveMode.Overwrite).save(targetPath) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(targetPath) } } From 174c3037e1ed5b4c6e7b23275b1dac97e93d7ffd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 14:40:16 +0100 Subject: [PATCH 126/176] OAF-store-graph mdstores: save them in text format --- .../scala/eu/dnetlib/dhp/collection/CollectionUtils.scala | 8 +++----- .../dhp/datacite/GenerateDataciteDatasetSpark.scala | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala index 509e464e5..e7de60e07 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -48,18 +48,16 @@ object CollectionUtils { List() } - def saveDataset(d: Dataset[Oaf], targetPath: String):Unit = { + def saveDataset(dataset: Dataset[Oaf], targetPath: String): Unit = { implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val mapper = new ObjectMapper - d + dataset .flatMap(i => CollectionUtils.fixRelations(i)) .filter(i => i != null) - .map(r => mapper.writeValueAsString(r))(Encoders.STRING) .write .mode(SaveMode.Overwrite) .option("compression", "gzip") - .text(targetPath) + .json(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index d11c33fb4..a205edcf2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -38,7 +38,7 @@ class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) val outputBasePath = cleanedMdStoreVersion.getHdfsPath log.info(s"outputBasePath is '$outputBasePath'") - val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" + val targetPath = s"$outputBasePath$MDSTORE_DATA_PATH" log.info(s"targetPath is '$targetPath'") generateDataciteDataset(sourcePath, exportLinks, vocabularies, targetPath, spark) @@ -54,7 +54,7 @@ class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log * @param outputBasePath */ def reportTotalSize( targetPath: String, outputBasePath: String ):Unit = { - val total_items = spark.read.load(targetPath).count() + val total_items = spark.read.text(targetPath).count() writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) } From 58f8998e3dd296ad01a7f3cd9fb913cb12acf145 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 15:02:09 +0100 Subject: [PATCH 127/176] OAF-store-graph mdstores: save them in text format --- .../scala/eu/dnetlib/dhp/collection/CollectionUtils.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala index e7de60e07..86a28ac10 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -50,14 +50,16 @@ object CollectionUtils { def saveDataset(dataset: Dataset[Oaf], targetPath: String): Unit = { implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + val mapper = new ObjectMapper dataset .flatMap(i => CollectionUtils.fixRelations(i)) .filter(i => i != null) + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) .write .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(targetPath) + .text(targetPath) } } From 908294d86e03a199c2f3e9db98fcdb689c66db48 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 5 Jan 2022 15:49:05 +0100 Subject: [PATCH 128/176] OAF-store-graph mdstores: firther fix for PR#180 --- .../raw/CopyHdfsOafSparkApplication.scala | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index 1376c6b35..fa13f477c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -48,26 +48,27 @@ object CopyHdfsOafSparkApplication { log.info("hdfsPath: {}", hdfsPath) implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - import spark.implicits._ val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList + val types = ModelSupport.oafTypes.entrySet + .asScala + .map(e => Tuple2(e.getKey, e.getValue)) + if (validPaths.nonEmpty) { - val oaf = spark.read.load(validPaths: _*).as[String] + val oaf = spark.read.textFile(validPaths: _*) val mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val l = ModelSupport.oafTypes.entrySet.asScala.toList - l.foreach( - e => - oaf - .filter(o => isOafType(o, e.getKey)) - .map(j => mapper.readValue(j, e.getValue).asInstanceOf[Oaf]) - .map(s => mapper.writeValueAsString(s))(Encoders.STRING) - .write - .option("compression", "gzip") - .mode(SaveMode.Append) - .text(s"$hdfsPath/${e}") + + types.foreach(t => oaf + .filter(o => isOafType(o, t._1)) + .map(j => mapper.readValue(j, t._2).asInstanceOf[Oaf]) + .map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .write + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(s"$hdfsPath/${t._1}") ) } } From adccc2346a46283aedd9ce0a33e37b3236002be6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 7 Jan 2022 11:28:50 +0100 Subject: [PATCH 129/176] [SDG-FOS] to lower case for the doi --- .../actionmanager/createunresolvedentities/GetSDGSparkJob.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java index 328075389..e2c28e64e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java @@ -69,8 +69,6 @@ public class GetSDGSparkJob implements Serializable { .read() .format("csv") .option("sep", delimiter) - .option("inferSchema", "true") - .option("header", "true") .option("quotes", "\"") .load(sourcePath); From b7e450070b048dde950c68f54a23be66880297b6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 7 Jan 2022 12:13:26 +0100 Subject: [PATCH 130/176] [SDG-FOS] to import SDG file not considering the header --- .../actionmanager/createunresolvedentities/GetSDGSparkJob.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java index e2c28e64e..328075389 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java @@ -69,6 +69,8 @@ public class GetSDGSparkJob implements Serializable { .read() .format("csv") .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") .option("quotes", "\"") .load(sourcePath); From 064f9bbd87f71434c070c8a98589eaa6afe5f050 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 7 Jan 2022 18:58:51 +0100 Subject: [PATCH 131/176] [AFFPropSR] added new paprameter for the number of iterations and new code for just one iteration --- .../SparkResultToOrganizationFromSemRel.java | 69 ++++++++++++++----- .../input_propagation_parameter.json | 6 ++ .../oozie_app/workflow.xml | 3 +- 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index 9ceebf222..cfc69a8f0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.io.Serializable; import java.util.Arrays; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -64,6 +65,18 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { final String workingPath = parser.get("workingDir"); log.info("workingPath: {}", workingPath); + final int iterations = Optional + .ofNullable(parser.get("iterations")) + .map(v -> { + if (Integer.valueOf(v) < MAX_ITERATION) { + return Integer.valueOf(v); + } else + return MAX_ITERATION; + }) + .orElse(MAX_ITERATION); + + log.info("iterations: {}", iterations); + SparkConf conf = new SparkConf(); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); @@ -77,7 +90,8 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { resultOrganizationPath, relationPath, workingPath, - outputPath)); + outputPath, + iterations)); } public static void execPropagation(SparkSession spark, @@ -86,26 +100,45 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { String resultOrganizationPath, String graphPath, String workingPath, + String outputPath, + int iterations) { + if (iterations == 1) { + doPropagateOnce( + spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + workingPath, outputPath); + } else { + + final LongAccumulator iterationOne = spark.sparkContext().longAccumulator(ITERATION_ONE); + final LongAccumulator iterationTwo = spark.sparkContext().longAccumulator(ITERATION_TWO); + final LongAccumulator iterationThree = spark.sparkContext().longAccumulator(ITERATION_THREE); + final LongAccumulator iterationFour = spark.sparkContext().longAccumulator(ITERATION_FOUR); + final LongAccumulator iterationFive = spark.sparkContext().longAccumulator(ITERATION_FIVE); + final LongAccumulator notReachedFirstParent = spark.sparkContext().longAccumulator(ITERATION_NO_PARENT); + + final PropagationCounter propagationCounter = new PropagationCounter(iterationOne, + iterationTwo, + iterationThree, + iterationFour, + iterationFive, + notReachedFirstParent); + + doPropagate( + spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + workingPath, outputPath, propagationCounter); + } + + } + + private static void doPropagateOnce(SparkSession spark, String leavesPath, String childParentPath, + String resultOrganizationPath, String graphPath, String workingPath, String outputPath) { - final LongAccumulator iterationOne = spark.sparkContext().longAccumulator(ITERATION_ONE); - final LongAccumulator iterationTwo = spark.sparkContext().longAccumulator(ITERATION_TWO); - final LongAccumulator iterationThree = spark.sparkContext().longAccumulator(ITERATION_THREE); - final LongAccumulator iterationFour = spark.sparkContext().longAccumulator(ITERATION_FOUR); - final LongAccumulator iterationFive = spark.sparkContext().longAccumulator(ITERATION_FIVE); - final LongAccumulator notReachedFirstParent = spark.sparkContext().longAccumulator(ITERATION_NO_PARENT); - - final PropagationCounter propagationCounter = new PropagationCounter(iterationOne, - iterationTwo, - iterationThree, - iterationFour, - iterationFive, - notReachedFirstParent); - - doPropagate( - spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, - workingPath, outputPath, propagationCounter); + StepActions + .execStep( + spark, graphPath, workingPath + NEW_RELATION_PATH, + leavesPath, childParentPath, resultOrganizationPath); + addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); } private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json index f73cc221e..e09cd62fa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json @@ -46,5 +46,11 @@ "paramLongName": "outputPath", "paramDescription": "the path used to store temporary output files", "paramRequired": true + }, + { + "paramName": "it", + "paramLongName": "iterations", + "paramDescription": "the number of iterations to be computed", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml index 17502abea..3f0530aaf 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sourcePath @@ -181,6 +181,7 @@ --resultOrgPath${workingDir}/preparedInfo/resultOrgPath --hive_metastore_uris${hive_metastore_uris} --workingDir${workingDir}/working + --iterations${iterations} From 0163dadb7f9abbe1c801b55d30728c4e3768627f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 11 Jan 2022 11:05:44 +0100 Subject: [PATCH 132/176] [doiboost] - update MAG schema, new filed added on version dec-2021 --- .../eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index d25a4893f..039c935f3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -41,7 +41,7 @@ object SparkImportMagIntoDataset { "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")), "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")), "PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")), - "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "CreatedDate:DateTime")), + "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "DocSubTypes:string", "CreatedDate:DateTime")), "RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float")) ) From 4f212652caaa96571c080877742cea3b0ec67c66 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 Jan 2022 16:57:48 +0100 Subject: [PATCH 133/176] scalafmt: code formatting --- .../src/main/resources/scalafmt/scalafmt.conf | 15 + .../application/SparkScalaApplication.scala | 73 +-- .../dhp/sx/graph/scholix/ScholixUtils.scala | 260 +++++---- .../dhp/collection/CollectionUtils.scala | 18 +- .../dhp/datacite/AbstractRestClient.scala | 16 +- .../dhp/datacite/DataciteAPIImporter.scala | 13 +- .../dhp/datacite/DataciteModelConstants.scala | 284 +++++++--- .../DataciteToOAFTransformation.scala | 411 +++++++++----- .../GenerateDataciteDatasetSpark.scala | 74 ++- .../dnetlib/dhp/datacite/ImportDatacite.scala | 100 ++-- .../SparkDownloadUpdateDatacite.scala | 24 +- .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 380 ++++++++++--- .../bio/SparkTransformBioDatabaseToOAF.scala | 29 +- .../ebi/SparkCreateBaselineDataFrame.scala | 116 ++-- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 52 +- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 26 +- .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 107 ++-- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 193 ++++--- .../sx/graph/SparkRetrieveDataciteDelta.scala | 358 ++++++------ .../dhp/datacite/DataciteToOAFTest.scala | 62 +- .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 132 +++-- .../doiboost/DoiBoostMappingUtil.scala | 324 ++++++----- .../SparkGenerateDOIBoostActionSet.scala | 79 ++- .../doiboost/SparkGenerateDoiBoost.scala | 246 +++++--- .../doiboost/crossref/Crossref2Oaf.scala | 530 +++++++++++------- .../doiboost/crossref/CrossrefDataset.scala | 34 +- .../crossref/GenerateCrossrefDataset.scala | 23 +- .../crossref/SparkMapDumpIntoOAF.scala | 41 +- .../crossref/UnpackCrtossrefEntries.scala | 21 +- .../dnetlib/doiboost/mag/MagDataModel.scala | 269 ++++++--- .../mag/SparkImportMagIntoDataset.scala | 273 +++++++-- .../doiboost/mag/SparkProcessMAG.scala | 197 +++++-- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 78 +-- .../orcid/SparkConvertORCIDToOAF.scala | 16 +- .../doiboost/orcid/SparkPreprocessORCID.scala | 51 +- .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 17 +- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 58 +- .../doiboost/DoiBoostHostedByMapTest.scala | 8 +- .../dhp/doiboost/NormalizeDoiTest.scala | 16 +- .../crossref/CrossrefMappingTest.scala | 449 ++++++++++----- .../dhp/doiboost/mag/MAGMappingTest.scala | 44 +- .../orcid/MappingORCIDToOAFTest.scala | 41 +- .../doiboost/uw/UnpayWallMappingTest.scala | 32 +- .../oa/graph/hostedbymap/Aggregators.scala | 229 +++++--- .../SparkApplyHostedByMapToDatasource.scala | 33 +- .../SparkApplyHostedByMapToResult.scala | 42 +- .../SparkPrepareHostedByInfoToApply.scala | 61 +- .../hostedbymap/SparkProduceHostedByMap.scala | 130 +++-- .../raw/CopyHdfsOafSparkApplication.scala | 26 +- .../resolution/SparkResolveEntities.scala | 101 ++-- .../resolution/SparkResolveRelation.scala | 83 +-- .../sx/graphimport/SparkDataciteToOAF.scala | 12 +- .../graph/SparkConvertDatasetToJsonRDD.scala | 18 +- .../sx/graph/SparkConvertObjectToJson.scala | 25 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 100 +++- .../dhp/sx/graph/SparkCreateInputGraph.scala | 97 +++- .../dhp/sx/graph/SparkCreateScholix.scala | 111 ++-- .../sx/graph/SparkCreateSummaryObject.scala | 22 +- .../dhp/sx/graph/pangaea/PangaeaUtils.scala | 101 ++-- .../SparkGeneratePanagaeaDataset.scala | 25 +- .../dhp/oa/graph/hostedbymap/TestApply.scala | 142 +++-- .../oa/graph/hostedbymap/TestPrepare.scala | 69 ++- .../oa/graph/hostedbymap/TestPreprocess.scala | 115 ++-- .../resolution/ResolveEntitiesTest.scala | 293 ++++++---- .../sx/graph/RetrieveDataciteDeltaTest.scala | 8 +- .../sx/graph/scholix/ScholixGraphTest.scala | 71 ++- pom.xml | 39 ++ 67 files changed, 4905 insertions(+), 2638 deletions(-) create mode 100644 dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf diff --git a/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf new file mode 100644 index 000000000..00c866f28 --- /dev/null +++ b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf @@ -0,0 +1,15 @@ +style = defaultWithAlign + +align.openParenCallSite = false +align.openParenDefnSite = false +align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}] +continuationIndent.callSite = 2 +continuationIndent.defnSite = 2 +danglingParentheses = true +indentOperator = spray +maxColumn = 100 +newlines.alwaysBeforeTopLevelStatements = true +#project.excludeFilters = ["\.*\.sbt"] +rewrite.rules = [RedundantParens, SortImports] +spaces.inImportCurlyBraces = false +unindentTopLevelOperators = true \ No newline at end of file diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala index 6541746b2..f8afe9af4 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala @@ -2,71 +2,72 @@ package eu.dnetlib.dhp.application import scala.io.Source -/** - * This is the main Interface SparkApplication - * where all the Spark Scala class should inherit - * - */ +/** This is the main Interface SparkApplication + * where all the Spark Scala class should inherit + */ trait SparkScalaApplication { - /** - * This is the path in the classpath of the json - * describes all the argument needed to run - */ + + /** This is the path in the classpath of the json + * describes all the argument needed to run + */ val propertyPath: String - /** - * Utility to parse the arguments using the - * property json in the classpath identified from - * the variable propertyPath - * - * @param args the list of arguments - */ + /** Utility to parse the arguments using the + * property json in the classpath identified from + * the variable propertyPath + * + * @param args the list of arguments + */ def parseArguments(args: Array[String]): ArgumentApplicationParser = { - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString) + val parser = new ArgumentApplicationParser( + Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString + ) parser.parseArgument(args) parser } - /** - * Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ def run(): Unit } - import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.slf4j.Logger -abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication { +abstract class AbstractScalaApplication( + val propertyPath: String, + val args: Array[String], + log: Logger +) extends SparkScalaApplication { var parser: ArgumentApplicationParser = null - var spark:SparkSession = null + var spark: SparkSession = null - - def initialize():SparkScalaApplication = { + def initialize(): SparkScalaApplication = { parser = parseArguments(args) spark = createSparkSession() this } - /** - * Utility for creating a spark session starting from parser - * - * @return a spark Session - */ - private def createSparkSession():SparkSession = { - require(parser!= null) + /** Utility for creating a spark session starting from parser + * + * @return a spark Session + */ + private def createSparkSession(): SparkSession = { + require(parser != null) - val conf:SparkConf = new SparkConf() + val conf: SparkConf = new SparkConf() val master = parser.get("master") log.info(s"Creating Spark session: Master: $master") - SparkSession.builder().config(conf) + SparkSession + .builder() + .config(conf) .appName(getClass.getSimpleName) .master(master) .getOrCreate() } -} \ No newline at end of file +} diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index f35af0905..a995016a8 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -14,7 +14,6 @@ import scala.io.Source object ScholixUtils extends Serializable { - val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" val DATE_RELATION_KEY: String = "RelationDate" @@ -24,7 +23,11 @@ object ScholixUtils extends Serializable { case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} val relations: Map[String, RelationVocabulary] = { - val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json")).mkString + val input = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json") + ) + .mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -32,13 +35,14 @@ object ScholixUtils extends Serializable { json.extract[Map[String, RelationVocabulary]] } - def extractRelationDate(relation: Relation): String = { if (relation.getProperties == null || !relation.getProperties.isEmpty) null else { - val date = relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) + val date = relation.getProperties.asScala + .find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)) + .map(p => p.getValue) if (date.isDefined) date.get else @@ -58,78 +62,80 @@ object ScholixUtils extends Serializable { def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) - } - - def generateScholixResourceFromResult(r:Result) :ScholixResource = { + def generateScholixResourceFromResult(r: Result): ScholixResource = { generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r)) } + val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = + new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { + override def zero: RelatedEntities = null - val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { - override def zero: RelatedEntities = null + override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { + val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0 + val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0 - override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { - val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0 - val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0 - - if (b == null) - RelatedEntities(a._1, relatedDataset, relatedPublication) - else - RelatedEntities(a._1, b.relatedDataset + relatedDataset, b.relatedPublication + relatedPublication) - } - - override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { - if (b1 != null && b2 != null) - RelatedEntities(b1.id, b1.relatedDataset + b2.relatedDataset, b1.relatedPublication + b2.relatedPublication) - - else if (b1 != null) - b1 - else - b2 - } - - override def finish(reduction: RelatedEntities): RelatedEntities = reduction - - override def bufferEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) - - override def outputEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) - } - - - val scholixAggregator: Aggregator[(String, Scholix), Scholix, Scholix] = new Aggregator[(String, Scholix), Scholix, Scholix] with Serializable { - override def zero: Scholix = null - - - def scholix_complete(s: Scholix): Boolean = { - if (s == null || s.getIdentifier == null) { - false - } else if (s.getSource == null || s.getTarget == null) { - false + if (b == null) + RelatedEntities(a._1, relatedDataset, relatedPublication) + else + RelatedEntities( + a._1, + b.relatedDataset + relatedDataset, + b.relatedPublication + relatedPublication + ) } - else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) - false - else - true + + override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { + if (b1 != null && b2 != null) + RelatedEntities( + b1.id, + b1.relatedDataset + b2.relatedDataset, + b1.relatedPublication + b2.relatedPublication + ) + else if (b1 != null) + b1 + else + b2 + } + + override def finish(reduction: RelatedEntities): RelatedEntities = reduction + + override def bufferEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) + + override def outputEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) } - override def reduce(b: Scholix, a: (String, Scholix)): Scholix = { - if (scholix_complete(b)) b else a._2 + val scholixAggregator: Aggregator[(String, Scholix), Scholix, Scholix] = + new Aggregator[(String, Scholix), Scholix, Scholix] with Serializable { + override def zero: Scholix = null + + def scholix_complete(s: Scholix): Boolean = { + if (s == null || s.getIdentifier == null) { + false + } else if (s.getSource == null || s.getTarget == null) { + false + } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) + false + else + true + } + + override def reduce(b: Scholix, a: (String, Scholix)): Scholix = { + if (scholix_complete(b)) b else a._2 + } + + override def merge(b1: Scholix, b2: Scholix): Scholix = { + if (scholix_complete(b1)) b1 else b2 + } + + override def finish(reduction: Scholix): Scholix = reduction + + override def bufferEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + + override def outputEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] } - override def merge(b1: Scholix, b2: Scholix): Scholix = { - if (scholix_complete(b1)) b1 else b2 - } - - override def finish(reduction: Scholix): Scholix = reduction - - override def bufferEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] - - override def outputEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] - } - - def createInverseScholixRelation(scholix: Scholix): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) @@ -138,16 +144,19 @@ object ScholixUtils extends Serializable { s.setRelationship(inverseRelationShip(scholix.getRelationship)) s.setSource(scholix.getTarget) s.setTarget(scholix.getSource) - s.setIdentifier(DHPUtils.md5(s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}")) + s.setIdentifier( + DHPUtils.md5( + s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}" + ) + ) s - } def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = { if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) { - val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { - d => new ScholixEntityId(d.getProvider.getName, d.getProvider.getIdentifiers) + val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d => + new ScholixEntityId(d.getProvider.getName, d.getProvider.getIdentifiers) }(collection.breakOut) l } else List() @@ -155,8 +164,11 @@ object ScholixUtils extends Serializable { def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { - val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { - d => new ScholixEntityId(d.getDatasourceName, List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava) + val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d => + new ScholixEntityId( + d.getDatasourceName, + List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava + ) }(collection.breakOut) l } else List() @@ -165,17 +177,16 @@ object ScholixUtils extends Serializable { def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { - - val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { - c => - - new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava) + val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c => + new ScholixEntityId( + c.getValue, + List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava + ) }.toList l } else List() } - def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) @@ -184,11 +195,14 @@ object ScholixUtils extends Serializable { s.setRelationship(scholix.getRelationship) s.setSource(scholix.getSource) s.setTarget(generateScholixResourceFromSummary(target)) - s.setIdentifier(DHPUtils.md5(s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}")) + s.setIdentifier( + DHPUtils.md5( + s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}" + ) + ) s } - def generateCompleteScholix(scholix: Scholix, target: ScholixResource): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) @@ -197,11 +211,14 @@ object ScholixUtils extends Serializable { s.setRelationship(scholix.getRelationship) s.setSource(scholix.getSource) s.setTarget(target) - s.setIdentifier(DHPUtils.md5(s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}")) + s.setIdentifier( + DHPUtils.md5( + s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}" + ) + ) s } - def generateScholixResourceFromSummary(summaryObject: ScholixSummary): ScholixResource = { val r = new ScholixResource r.setIdentifier(summaryObject.getLocalIdentifier) @@ -214,7 +231,8 @@ object ScholixUtils extends Serializable { r.setTitle(summaryObject.getTitle.get(0)) if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) { - val l: List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList + val l: List[ScholixEntityId] = + summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList if (l.nonEmpty) r.setCreator(l.asJava) } @@ -222,20 +240,27 @@ object ScholixUtils extends Serializable { if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty) r.setPublicationDate(summaryObject.getDate.get(0)) if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) { - val plist: List[ScholixEntityId] = summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList + val plist: List[ScholixEntityId] = + summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList if (plist.nonEmpty) r.setPublisher(plist.asJava) } - if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) { - val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( - new ScholixEntityId(c.getDatasourceName, List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava) - , "collected", "complete" - - )).toList + val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala + .map(c => + new ScholixCollectedFrom( + new ScholixEntityId( + c.getDatasourceName, + List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava + ), + "collected", + "complete" + ) + ) + .toList if (l.nonEmpty) r.setCollectedFrom(l.asJava) @@ -244,9 +269,7 @@ object ScholixUtils extends Serializable { r } - - - def scholixFromSource(relation: Relation, source: ScholixResource):Scholix = { + def scholixFromSource(relation: Relation, source: ScholixResource): Scholix = { if (relation == null || source == null) return null val s = new Scholix @@ -262,7 +285,6 @@ object ScholixUtils extends Serializable { s.setPublicationDate(d) - if (source.getPublisher != null && !source.getPublisher.isEmpty) { s.setPublisher(source.getPublisher) } @@ -270,13 +292,14 @@ object ScholixUtils extends Serializable { val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) if (semanticRelation == null) return null - s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) + s.setRelationship( + new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse) + ) s.setSource(source) s } - def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { if (relation == null || source == null) @@ -298,12 +321,10 @@ object ScholixUtils extends Serializable { s.setPublicationDate(d) - if (source.getPublisher != null && !source.getPublisher.isEmpty) { val l: List[ScholixEntityId] = source.getPublisher.asScala - .map { - p => - new ScholixEntityId(p, null) + .map { p => + new ScholixEntityId(p, null) }(collection.breakOut) if (l.nonEmpty) @@ -313,31 +334,37 @@ object ScholixUtils extends Serializable { val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) if (semanticRelation == null) return null - s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) + s.setRelationship( + new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse) + ) s.setSource(generateScholixResourceFromSummary(source)) s } + def findURLForPID( + pidValue: List[StructuredProperty], + urls: List[String] + ): List[(StructuredProperty, String)] = { + pidValue.map { p => + val pv = p.getValue - def findURLForPID(pidValue: List[StructuredProperty], urls: List[String]): List[(StructuredProperty, String)] = { - pidValue.map { - p => - val pv = p.getValue - - val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase)) - (p, r.orNull) + val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase)) + (p, r.orNull) } } - def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { if (r.getInstance() == null || r.getInstance().isEmpty) return List() - r.getInstance().asScala.filter(i => i.getUrl != null && !i.getUrl.isEmpty) + r.getInstance() + .asScala + .filter(i => i.getUrl != null && !i.getUrl.isEmpty) .filter(i => i.getPid != null && i.getUrl != null) .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) - .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList + .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)) + .distinct + .toList } def resultToSummary(r: Result): ScholixSummary = { @@ -371,7 +398,12 @@ object ScholixUtils extends Serializable { s.setAuthor(authors.asJava) } if (r.getInstance() != null) { - val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue).toList + val dt: List[String] = r + .getInstance() + .asScala + .filter(i => i.getDateofacceptance != null) + .map(i => i.getDateofacceptance.getValue) + .toList if (dt.nonEmpty) s.setDate(dt.distinct.asJava) } @@ -382,7 +414,9 @@ object ScholixUtils extends Serializable { } if (r.getSubject != null && !r.getSubject.isEmpty) { - val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)).toList + val subjects: List[SchemeValue] = r.getSubject.asScala + .map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)) + .toList if (subjects.nonEmpty) s.setSubject(subjects.asJava) } @@ -391,7 +425,9 @@ object ScholixUtils extends Serializable { s.setPublisher(List(r.getPublisher.getValue).asJava) if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { - val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete")).toList + val cf: List[CollectedFromType] = r.getCollectedfrom.asScala + .map(c => new CollectedFromType(c.getValue, c.getKey, "complete")) + .toList if (cf.nonEmpty) s.setDatasources(cf.distinct.asJava) } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala index 86a28ac10..85f5a3082 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -7,16 +7,14 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode} object CollectionUtils { - /** - * This method in pipeline to the transformation phase, - * generates relations in both verse, typically it should be a phase of flatMap - * - * @param i input OAF - * @return - * If the input OAF is an entity -> List(i) - * If the input OAF is a relation -> List(relation, inverseRelation) - * - */ + /** This method in pipeline to the transformation phase, + * generates relations in both verse, typically it should be a phase of flatMap + * + * @param i input OAF + * @return + * If the input OAF is an entity -> List(i) + * If the input OAF is a relation -> List(relation, inverseRelation) + */ def fixRelations(i: Oaf): List[Oaf] = { if (i.isInstanceOf[OafEntity]) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala index 6a9b8e3e5..471149b25 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala @@ -6,7 +6,6 @@ import org.apache.http.client.methods.{HttpGet, HttpPost, HttpUriRequest} import org.apache.http.entity.StringEntity import org.apache.http.impl.client.HttpClientBuilder - abstract class AbstractRestClient extends Iterator[String] { var buffer: List[String] = List() @@ -16,12 +15,10 @@ abstract class AbstractRestClient extends Iterator[String] { var complete: Boolean = false - def extractInfo(input: String): Unit protected def getBufferData(): Unit - def doHTTPGETRequest(url: String): String = { val httpGet = new HttpGet(url) doHTTPRequest(httpGet) @@ -43,7 +40,6 @@ abstract class AbstractRestClient extends Iterator[String] { buffer.nonEmpty && current_index < buffer.size } - override def next(): String = { val next_item: String = buffer(current_index) current_index = current_index + 1 @@ -52,13 +48,14 @@ abstract class AbstractRestClient extends Iterator[String] { next_item } - private def doHTTPRequest[A <: HttpUriRequest](r: A): String = { val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() try { var tries = 4 @@ -69,8 +66,7 @@ abstract class AbstractRestClient extends Iterator[String] { println(s"get response with status${response.getStatusLine.getStatusCode}") if (response.getStatusLine.getStatusCode > 400) { tries -= 1 - } - else + } else return IOUtils.toString(response.getEntity.getContent) } catch { case e: Throwable => @@ -87,4 +83,4 @@ abstract class AbstractRestClient extends Iterator[String] { } getBufferData() -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala index 7ec44a6ff..55823abd9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala @@ -3,7 +3,8 @@ package eu.dnetlib.dhp.datacite import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.{DefaultFormats, JValue} -class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until:Long = -1) extends AbstractRestClient { +class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) + extends AbstractRestClient { override def extractInfo(input: String): Unit = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -16,16 +17,18 @@ class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until:Long = - current_index = 0 } - def get_url():String ={ - val to = if (until> 0) s"$until" else "*" + def get_url(): String = { + val to = if (until > 0) s"$until" else "*" s"https://api.datacite.org/dois?page[cursor]=1&page[size]=$blocks&query=updated:[$timestamp%20TO%20$to]" } override def getBufferData(): Unit = { if (!complete) { - val response = if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get) else doHTTPGETRequest(get_url()) + val response = + if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get) + else doHTTPGETRequest(get_url()) extractInfo(response) } } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala index 6c5dc8cce..a59779387 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala @@ -10,24 +10,38 @@ import java.util.Locale import java.util.regex.Pattern import scala.io.Source -/** - * This class represent the dataModel of the input Dataset of Datacite - * @param doi THE DOI - * @param timestamp timestamp of last update date - * @param isActive the record is active or deleted - * @param json the json native records - */ +/** This class represent the dataModel of the input Dataset of Datacite + * @param doi THE DOI + * @param timestamp timestamp of last update date + * @param isActive the record is active or deleted + * @param json the json native records + */ case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {} /* The following class are utility class used for the mapping from json datacite to OAF Shema */ -case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {} +case class RelatedIdentifierType( + relationType: String, + relatedIdentifier: String, + relatedIdentifierType: String +) {} -case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {} +case class NameIdentifiersType( + nameIdentifierScheme: Option[String], + schemeUri: Option[String], + nameIdentifier: Option[String] +) {} -case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {} +case class CreatorType( + nameType: Option[String], + nameIdentifiers: Option[List[NameIdentifiersType]], + name: Option[String], + familyName: Option[String], + givenName: Option[String], + affiliation: Option[List[String]] +) {} case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} @@ -35,100 +49,230 @@ case class SubjectType(subject: Option[String], subjectScheme: Option[String]) { case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} -case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {} +case class FundingReferenceType( + funderIdentifierType: Option[String], + awardTitle: Option[String], + awardUri: Option[String], + funderName: Option[String], + funderIdentifier: Option[String], + awardNumber: Option[String] +) {} case class DateType(date: Option[String], dateType: Option[String]) {} -case class OAFRelations(relation:String, inverse:String, relType:String) +case class OAFRelations(relation: String, inverse: String, relType: String) - -class DataciteModelConstants extends Serializable { - -} +class DataciteModelConstants extends Serializable {} object DataciteModelConstants { - val REL_TYPE_VALUE:String = "resultResult" + val REL_TYPE_VALUE: String = "resultResult" val DATE_RELATION_KEY = "RelationDate" val DATACITE_FILTER_PATH = "/eu/dnetlib/dhp/datacite/datacite_filter" val DOI_CLASS = "doi" val SUBJ_CLASS = "keywords" val DATACITE_NAME = "Datacite" val dataInfo: DataInfo = dataciteDataInfo("0.9") - val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) - val subRelTypeMapping: Map[String,OAFRelations] = Map( - ModelConstants.REFERENCES -> OAFRelations(ModelConstants.REFERENCES, ModelConstants.IS_REFERENCED_BY, ModelConstants.RELATIONSHIP), - ModelConstants.IS_REFERENCED_BY -> OAFRelations(ModelConstants.IS_REFERENCED_BY,ModelConstants.REFERENCES, ModelConstants.RELATIONSHIP), + val DATACITE_COLLECTED_FROM: KeyValue = + OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) - ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.SUPPLEMENT), - ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations(ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.SUPPLEMENT), - - ModelConstants.HAS_PART -> OAFRelations(ModelConstants.HAS_PART,ModelConstants.IS_PART_OF, ModelConstants.PART), - ModelConstants.IS_PART_OF -> OAFRelations(ModelConstants.IS_PART_OF,ModelConstants.HAS_PART, ModelConstants.PART), - - ModelConstants.IS_VERSION_OF-> OAFRelations(ModelConstants.IS_VERSION_OF,ModelConstants.HAS_VERSION,ModelConstants.VERSION), - ModelConstants.HAS_VERSION-> OAFRelations(ModelConstants.HAS_VERSION,ModelConstants.IS_VERSION_OF,ModelConstants.VERSION), - - ModelConstants.IS_IDENTICAL_TO -> OAFRelations(ModelConstants.IS_IDENTICAL_TO,ModelConstants.IS_IDENTICAL_TO, ModelConstants.RELATIONSHIP), - - ModelConstants.IS_CONTINUED_BY -> OAFRelations(ModelConstants.IS_CONTINUED_BY,ModelConstants.CONTINUES, ModelConstants.RELATIONSHIP), - ModelConstants.CONTINUES -> OAFRelations(ModelConstants.CONTINUES,ModelConstants.IS_CONTINUED_BY, ModelConstants.RELATIONSHIP), - - ModelConstants.IS_NEW_VERSION_OF-> OAFRelations(ModelConstants.IS_NEW_VERSION_OF,ModelConstants.IS_PREVIOUS_VERSION_OF, ModelConstants.VERSION), - ModelConstants.IS_PREVIOUS_VERSION_OF ->OAFRelations(ModelConstants.IS_PREVIOUS_VERSION_OF,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION), - - ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP), - ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP), - - ModelConstants.IS_SOURCE_OF -> OAFRelations(ModelConstants.IS_SOURCE_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION), - ModelConstants.IS_DERIVED_FROM -> OAFRelations(ModelConstants.IS_DERIVED_FROM,ModelConstants.IS_SOURCE_OF, ModelConstants.VERSION), - - ModelConstants.CITES -> OAFRelations(ModelConstants.CITES,ModelConstants.IS_CITED_BY, ModelConstants.CITATION), - ModelConstants.IS_CITED_BY -> OAFRelations(ModelConstants.IS_CITED_BY,ModelConstants.CITES, ModelConstants.CITATION), - - ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(ModelConstants.IS_VARIANT_FORM_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION), - ModelConstants.IS_OBSOLETED_BY -> OAFRelations(ModelConstants.IS_OBSOLETED_BY,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION), - - ModelConstants.REVIEWS -> OAFRelations(ModelConstants.REVIEWS,ModelConstants.IS_REVIEWED_BY, ModelConstants.REVIEW), - ModelConstants.IS_REVIEWED_BY -> OAFRelations(ModelConstants.IS_REVIEWED_BY,ModelConstants.REVIEWS, ModelConstants.REVIEW), - - ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP), - ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP), - - ModelConstants.COMPILES -> OAFRelations(ModelConstants.COMPILES,ModelConstants.IS_COMPILED_BY, ModelConstants.RELATIONSHIP), - ModelConstants.IS_COMPILED_BY -> OAFRelations(ModelConstants.IS_COMPILED_BY,ModelConstants.COMPILES, ModelConstants.RELATIONSHIP) + val subRelTypeMapping: Map[String, OAFRelations] = Map( + ModelConstants.REFERENCES -> OAFRelations( + ModelConstants.REFERENCES, + ModelConstants.IS_REFERENCED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_REFERENCED_BY -> OAFRelations( + ModelConstants.IS_REFERENCED_BY, + ModelConstants.REFERENCES, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations( + ModelConstants.IS_SUPPLEMENTED_BY, + ModelConstants.IS_SUPPLEMENT_TO, + ModelConstants.SUPPLEMENT + ), + ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations( + ModelConstants.IS_SUPPLEMENT_TO, + ModelConstants.IS_SUPPLEMENTED_BY, + ModelConstants.SUPPLEMENT + ), + ModelConstants.HAS_PART -> OAFRelations( + ModelConstants.HAS_PART, + ModelConstants.IS_PART_OF, + ModelConstants.PART + ), + ModelConstants.IS_PART_OF -> OAFRelations( + ModelConstants.IS_PART_OF, + ModelConstants.HAS_PART, + ModelConstants.PART + ), + ModelConstants.IS_VERSION_OF -> OAFRelations( + ModelConstants.IS_VERSION_OF, + ModelConstants.HAS_VERSION, + ModelConstants.VERSION + ), + ModelConstants.HAS_VERSION -> OAFRelations( + ModelConstants.HAS_VERSION, + ModelConstants.IS_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.IS_IDENTICAL_TO -> OAFRelations( + ModelConstants.IS_IDENTICAL_TO, + ModelConstants.IS_IDENTICAL_TO, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_CONTINUED_BY -> OAFRelations( + ModelConstants.IS_CONTINUED_BY, + ModelConstants.CONTINUES, + ModelConstants.RELATIONSHIP + ), + ModelConstants.CONTINUES -> OAFRelations( + ModelConstants.CONTINUES, + ModelConstants.IS_CONTINUED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_NEW_VERSION_OF -> OAFRelations( + ModelConstants.IS_NEW_VERSION_OF, + ModelConstants.IS_PREVIOUS_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.IS_PREVIOUS_VERSION_OF -> OAFRelations( + ModelConstants.IS_PREVIOUS_VERSION_OF, + ModelConstants.IS_NEW_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations( + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.DOCUMENTS, + ModelConstants.RELATIONSHIP + ), + ModelConstants.DOCUMENTS -> OAFRelations( + ModelConstants.DOCUMENTS, + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_SOURCE_OF -> OAFRelations( + ModelConstants.IS_SOURCE_OF, + ModelConstants.IS_DERIVED_FROM, + ModelConstants.VERSION + ), + ModelConstants.IS_DERIVED_FROM -> OAFRelations( + ModelConstants.IS_DERIVED_FROM, + ModelConstants.IS_SOURCE_OF, + ModelConstants.VERSION + ), + ModelConstants.CITES -> OAFRelations( + ModelConstants.CITES, + ModelConstants.IS_CITED_BY, + ModelConstants.CITATION + ), + ModelConstants.IS_CITED_BY -> OAFRelations( + ModelConstants.IS_CITED_BY, + ModelConstants.CITES, + ModelConstants.CITATION + ), + ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations( + ModelConstants.IS_VARIANT_FORM_OF, + ModelConstants.IS_DERIVED_FROM, + ModelConstants.VERSION + ), + ModelConstants.IS_OBSOLETED_BY -> OAFRelations( + ModelConstants.IS_OBSOLETED_BY, + ModelConstants.IS_NEW_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.REVIEWS -> OAFRelations( + ModelConstants.REVIEWS, + ModelConstants.IS_REVIEWED_BY, + ModelConstants.REVIEW + ), + ModelConstants.IS_REVIEWED_BY -> OAFRelations( + ModelConstants.IS_REVIEWED_BY, + ModelConstants.REVIEWS, + ModelConstants.REVIEW + ), + ModelConstants.DOCUMENTS -> OAFRelations( + ModelConstants.DOCUMENTS, + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations( + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.DOCUMENTS, + ModelConstants.RELATIONSHIP + ), + ModelConstants.COMPILES -> OAFRelations( + ModelConstants.COMPILES, + ModelConstants.IS_COMPILED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_COMPILED_BY -> OAFRelations( + ModelConstants.IS_COMPILED_BY, + ModelConstants.COMPILES, + ModelConstants.RELATIONSHIP + ) ) - val datacite_filter: List[String] = { val stream: InputStream = getClass.getResourceAsStream(DATACITE_FILTER_PATH) - require(stream!= null) + require(stream != null) Source.fromInputStream(stream).getLines().toList } + def dataciteDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo( + false, + null, + false, + false, + ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, + trust + ) - def dataciteDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo(false,null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, trust) + val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern( + "[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", + Locale.ENGLISH + ) - val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) - val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) + val df_it: DateTimeFormatter = + DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) val funder_regex: List[(Pattern, String)] = List( - (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"), - (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::") - + ( + Pattern.compile( + "(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", + Pattern.MULTILINE | Pattern.CASE_INSENSITIVE + ), + "40|corda__h2020::" + ), + ( + Pattern.compile( + "(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", + Pattern.MULTILINE | Pattern.CASE_INSENSITIVE + ), + "40|corda_______::" + ) ) val Date_regex: List[Pattern] = List( //Y-M-D - Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE), + Pattern.compile( + "(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", + Pattern.MULTILINE + ), //M-D-Y - Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE), + Pattern.compile( + "((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", + Pattern.MULTILINE + ), //D-M-Y - Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE), + Pattern.compile( + "(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", + Pattern.MULTILINE + ), //Y Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) ) - } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index a662cf99d..13866d0d0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -20,19 +20,16 @@ import java.time.format.DateTimeFormatter import java.util.{Date, Locale} import scala.collection.JavaConverters._ - object DataciteToOAFTransformation { val mapper = new ObjectMapper() - - /** - * This method should skip record if json contains invalid text - * defined in gile datacite_filter - * - * @param json - * @return True if the record should be skipped - */ + /** This method should skip record if json contains invalid text + * defined in gile datacite_filter + * + * @param json + * @return True if the record should be skipped + */ def skip_record(json: String): Boolean = { datacite_filter.exists(f => json.contains(f)) } @@ -74,35 +71,35 @@ object DataciteToOAFTransformation { } - def embargo_end(embargo_end_date: String): Boolean = { val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) val td = LocalDate.now() td.isAfter(dt) } - def extract_date(input: String): Option[String] = { - val d = Date_regex.map(pattern => { - val matcher = pattern.matcher(input) - if (matcher.find()) - matcher.group(0) - else - null - } - ).find(s => s != null) + val d = Date_regex + .map(pattern => { + val matcher = pattern.matcher(input) + if (matcher.find()) + matcher.group(0) + else + null + }) + .find(s => s != null) if (d.isDefined) { val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get try { return Some(LocalDate.parse(a_date, df_en).toString) } catch { - case _: Throwable => try { - return Some(LocalDate.parse(a_date, df_it).toString) - } catch { - case _: Throwable => - return None - } + case _: Throwable => + try { + return Some(LocalDate.parse(a_date, df_it).toString) + } catch { + case _: Throwable => + return None + } } } d @@ -118,31 +115,63 @@ object DataciteToOAFTransformation { } } - - def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = { + def getTypeQualifier( + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): (Qualifier, Qualifier) = { if (resourceType != null && resourceType.nonEmpty) { - val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) + val typeQualifier = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) if (typeQualifier != null) - return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid)) + return ( + typeQualifier, + vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_RESULT_TYPOLOGIES, + typeQualifier.getClassid + ) + ) } if (schemaOrg != null && schemaOrg.nonEmpty) { - val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg) + val typeQualifier = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg) if (typeQualifier != null) - return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid)) + return ( + typeQualifier, + vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_RESULT_TYPOLOGIES, + typeQualifier.getClassid + ) + ) } if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) { - val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceTypeGeneral) + val typeQualifier = vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_PUBLICATION_RESOURCE, + resourceTypeGeneral + ) if (typeQualifier != null) - return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid)) + return ( + typeQualifier, + vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_RESULT_TYPOLOGIES, + typeQualifier.getClassid + ) + ) } null } - - def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): Result = { - val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) + def getResult( + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): Result = { + val typeQualifiers: (Qualifier, Qualifier) = + getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) if (typeQualifiers == null) return null val i = new Instance @@ -168,13 +197,12 @@ object DataciteToOAFTransformation { null } - def available_date(input: String): Boolean = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: org.json4s.JValue = parse(input) val l: List[String] = for { - JObject(dates) <- json \\ "dates" + JObject(dates) <- json \\ "dates" JField("dateType", JString(dateTypes)) <- dates } yield dateTypes @@ -182,18 +210,19 @@ object DataciteToOAFTransformation { } - - /** - * As describe in ticket #6377 - * when the result come from figshare we need to remove subject - * and set Access rights OPEN. - * - * @param r - */ + /** As describe in ticket #6377 + * when the result come from figshare we need to remove subject + * and set Access rights OPEN. + * + * @param r + */ def fix_figshare(r: Result): Unit = { if (r.getInstance() != null) { - val hosted_by_figshare = r.getInstance().asScala.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue)) + val hosted_by_figshare = r + .getInstance() + .asScala + .exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue)) if (hosted_by_figshare) { r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT())) val l: List[StructuredProperty] = List() @@ -201,10 +230,8 @@ object DataciteToOAFTransformation { } } - } - def createDNetTargetIdentifier(pid: String, pidType: String, idPrefix: String): String = { val f_part = s"$idPrefix|${pidType.toLowerCase}".padTo(15, '_') s"$f_part::${IdentifierFactory.md5(pid.toLowerCase)}" @@ -214,7 +241,13 @@ object DataciteToOAFTransformation { OafMapperUtils.structuredProperty(dt, q, null) } - def generateRelation(sourceId: String, targetId: String, relClass: String, cf: KeyValue, di: DataInfo): Relation = { + def generateRelation( + sourceId: String, + targetId: String, + relClass: String, + cf: KeyValue, + di: DataInfo + ): Relation = { val r = new Relation r.setSource(sourceId) @@ -226,7 +259,6 @@ object DataciteToOAFTransformation { r.setDataInfo(di) r - } def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = { @@ -238,14 +270,18 @@ object DataciteToOAFTransformation { val grantId = m.matcher(awardUri).replaceAll("$2") val targetId = s"$p${DHPUtils.md5(grantId)}" List(generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo)) - } - else + } else List() } - - def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup, exportLinks: Boolean): List[Oaf] = { + def generateOAF( + input: String, + ts: Long, + dateOfCollection: Long, + vocabularies: VocabularyGroup, + exportLinks: Boolean + ): List[Oaf] = { if (skip_record(input)) return List() @@ -253,7 +289,8 @@ object DataciteToOAFTransformation { lazy val json = parse(input) val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null) - val resourceTypeGeneral = (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null) + val resourceTypeGeneral = + (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null) val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null) val doi = (json \ "attributes" \ "doi").extract[String] @@ -265,8 +302,12 @@ object DataciteToOAFTransformation { if (result == null) return List() - - val doi_q = OafMapperUtils.qualifier("doi", "doi", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES) + val doi_q = OafMapperUtils.qualifier( + "doi", + "doi", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ) val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) result.setPid(List(pid).asJava) result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) @@ -275,48 +316,72 @@ object DataciteToOAFTransformation { val d = new Date(dateOfCollection * 1000) val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) - result.setDateofcollection(ISO8601FORMAT.format(d)) result.setDateoftransformation(ISO8601FORMAT.format(d)) result.setDataInfo(dataInfo) val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) - val authors = creators.zipWithIndex.map { case (c, idx) => val a = new Author a.setFullname(c.name.orNull) a.setName(c.givenName.orNull) a.setSurname(c.familyName.orNull) - if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) { - a.setPid(c.nameIdentifiers.get.map(ni => { - val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null - if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) { - OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo) - } - else - null + if ( + c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null + ) { + a.setPid( + c.nameIdentifiers.get + .map(ni => { + val q = + if (ni.nameIdentifierScheme.isDefined) + vocabularies.getTermAsQualifier( + ModelConstants.DNET_PID_TYPES, + ni.nameIdentifierScheme.get.toLowerCase() + ) + else null + if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) { + OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo) + } else + null - } + }) + .asJava ) - .asJava) } if (c.affiliation.isDefined) - a.setAffiliation(c.affiliation.get.filter(af => af.nonEmpty).map(af => OafMapperUtils.field(af, dataInfo)).asJava) + a.setAffiliation( + c.affiliation.get + .filter(af => af.nonEmpty) + .map(af => OafMapperUtils.field(af, dataInfo)) + .asJava + ) a.setRank(idx + 1) a } - val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) - result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => { - if (t.titleType.isEmpty) { - OafMapperUtils.structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null) - } else { - OafMapperUtils.structuredProperty(t.title.get, t.titleType.get, t.titleType.get, ModelConstants.DNET_DATACITE_TITLE, ModelConstants.DNET_DATACITE_TITLE, null) - } - }).asJava) + result.setTitle( + titles + .filter(t => t.title.nonEmpty) + .map(t => { + if (t.titleType.isEmpty) { + OafMapperUtils + .structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null) + } else { + OafMapperUtils.structuredProperty( + t.title.get, + t.titleType.get, + t.titleType.get, + ModelConstants.DNET_DATACITE_TITLE, + ModelConstants.DNET_DATACITE_TITLE, + null + ) + } + }) + .asJava + ) if (authors == null || authors.isEmpty || !authors.exists(a => a != null)) return List() @@ -330,53 +395,90 @@ object DataciteToOAFTransformation { .find(d => d.dateType.get.equalsIgnoreCase("issued")) .map(d => extract_date(d.date.get)) val a_date: Option[String] = dates - .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")) + .filter(d => + d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available") + ) .map(d => extract_date(d.date.get)) .find(d => d != null && d.isDefined) .map(d => d.get) if (a_date.isDefined) { if (doi.startsWith("10.14457")) - result.setEmbargoenddate(OafMapperUtils.field(fix_thai_date(a_date.get, "[yyyy-MM-dd]"), null)) + result.setEmbargoenddate( + OafMapperUtils.field(fix_thai_date(a_date.get, "[yyyy-MM-dd]"), null) + ) else result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null)) } if (i_date.isDefined && i_date.get.isDefined) { if (doi.startsWith("10.14457")) { - result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null)) - } - else { + result.setDateofacceptance( + OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null) + ) + result + .getInstance() + .get(0) + .setDateofacceptance( + OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null) + ) + } else { result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) } - } - else if (publication_year != null) { + } else if (publication_year != null) { if (doi.startsWith("10.14457")) { - result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null)) + result.setDateofacceptance( + OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null) + ) + result + .getInstance() + .get(0) + .setDateofacceptance( + OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null) + ) } else { result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) + result + .getInstance() + .get(0) + .setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) } } - - result.setRelevantdate(dates.filter(d => d.date.isDefined && d.dateType.isDefined) - .map(d => (extract_date(d.date.get), d.dateType.get)) - .filter(d => d._1.isDefined) - .map(d => (d._1.get, vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase()))) - .filter(d => d._2 != null) - .map(d => generateOAFDate(d._1, d._2)).asJava) + result.setRelevantdate( + dates + .filter(d => d.date.isDefined && d.dateType.isDefined) + .map(d => (extract_date(d.date.get), d.dateType.get)) + .filter(d => d._1.isDefined) + .map(d => + ( + d._1.get, + vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase()) + ) + ) + .filter(d => d._2 != null) + .map(d => generateOAFDate(d._1, d._2)) + .asJava + ) val subjects = (json \\ "subjects").extract[List[SubjectType]] - result.setSubject(subjects.filter(s => s.subject.nonEmpty) - .map(s => - OafMapperUtils.structuredProperty(s.subject.get, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null) - ).asJava) - + result.setSubject( + subjects + .filter(s => s.subject.nonEmpty) + .map(s => + OafMapperUtils.structuredProperty( + s.subject.get, + SUBJ_CLASS, + SUBJ_CLASS, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + null + ) + ) + .asJava + ) result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) @@ -384,66 +486,86 @@ object DataciteToOAFTransformation { result.setDescription( descriptions - .filter(d => d.description.isDefined). - map(d => - OafMapperUtils.field(d.description.get, null) - ).filter(s => s != null).asJava) - + .filter(d => d.description.isDefined) + .map(d => OafMapperUtils.field(d.description.get, null)) + .filter(s => s != null) + .asJava + ) val publisher = (json \\ "publisher").extractOrElse[String](null) if (publisher != null) result.setPublisher(OafMapperUtils.field(publisher, null)) - val language: String = (json \\ "language").extractOrElse[String](null) if (language != null) - result.setLanguage(vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language)) - + result.setLanguage( + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language) + ) val instance = result.getInstance().get(0) val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String] val accessRights: List[String] = for { - JObject(rightsList) <- json \\ "rightsList" + JObject(rightsList) <- json \\ "rightsList" JField("rightsUri", JString(rightsUri)) <- rightsList } yield rightsUri - val aRights: Option[AccessRight] = accessRights.map(r => { - vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r) - }).find(q => q != null).map(q => { - val a = new AccessRight - a.setClassid(q.getClassid) - a.setClassname(q.getClassname) - a.setSchemeid(q.getSchemeid) - a.setSchemename(q.getSchemename) - a - }) + val aRights: Option[AccessRight] = accessRights + .map(r => { + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r) + }) + .find(q => q != null) + .map(q => { + val a = new AccessRight + a.setClassid(q.getClassid) + a.setClassname(q.getClassname) + a.setSchemeid(q.getSchemeid) + a.setSchemename(q.getSchemename) + a + }) - - val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + val access_rights_qualifier = + if (aRights.isDefined) aRights.get + else + OafMapperUtils.accessRight( + ModelConstants.UNKNOWN, + ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) if (client.isDefined) { - instance.setHostedby(OafMapperUtils.keyValue(generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID), ModelConstants.UNKNOWN_REPOSITORY.getValue)) + instance.setHostedby( + OafMapperUtils.keyValue( + generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID), + ModelConstants.UNKNOWN_REPOSITORY.getValue + ) + ) instance.setCollectedfrom(DATACITE_COLLECTED_FROM) instance.setUrl(List(s"https://dx.doi.org/$doi").asJava) instance.setAccessright(access_rights_qualifier) instance.setPid(result.getPid) val license = accessRights - .find(r => r.startsWith("http") && r.matches(".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*")) + .find(r => + r.startsWith("http") && r.matches( + ".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*" + ) + ) if (license.isDefined) instance.setLicense(OafMapperUtils.field(license.get, null)) } val awardUris: List[String] = for { - JObject(fundingReferences) <- json \\ "fundingReferences" + JObject(fundingReferences) <- json \\ "fundingReferences" JField("awardUri", JString(awardUri)) <- fundingReferences } yield awardUri result.setId(IdentifierFactory.createIdentifier(result)) - var relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null) + var relations: List[Relation] = + awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null) fix_figshare(result) @@ -452,28 +574,35 @@ object DataciteToOAFTransformation { if (exportLinks) { val rels: List[RelatedIdentifierType] = for { - JObject(relIdentifier) <- json \\ "relatedIdentifiers" - JField("relationType", JString(relationType)) <- relIdentifier + JObject(relIdentifier) <- json \\ "relatedIdentifiers" + JField("relationType", JString(relationType)) <- relIdentifier JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier - JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier + JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) - relations = relations ::: generateRelations(rels, result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null) + relations = relations ::: generateRelations( + rels, + result.getId, + if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null + ) } if (relations != null && relations.nonEmpty) { List(result) ::: relations - } - else + } else List(result) } - private def generateRelations(rels: List[RelatedIdentifierType], id: String, date: String): List[Relation] = { + private def generateRelations( + rels: List[RelatedIdentifierType], + id: String, + date: String + ): List[Relation] = { rels .filter(r => - subRelTypeMapping.contains(r.relationType) && ( - r.relatedIdentifierType.equalsIgnoreCase("doi") || - r.relatedIdentifierType.equalsIgnoreCase("pmid") || - r.relatedIdentifierType.equalsIgnoreCase("arxiv")) + subRelTypeMapping + .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || + r.relatedIdentifierType.equalsIgnoreCase("pmid") || + r.relatedIdentifierType.equalsIgnoreCase("arxiv")) ) .map(r => { val rel = new Relation @@ -490,19 +619,19 @@ object DataciteToOAFTransformation { rel.setProperties(List(dateProps).asJava) rel.setSource(id) - rel.setTarget(DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)) + rel.setTarget( + DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType) + ) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel }) } - def generateDSId(input: String): String = { val b = StringUtils.substringBefore(input, "::") val a = StringUtils.substringAfter(input, "::") s"10|$b::${DHPUtils.md5(a)}" } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index a205edcf2..046290969 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -12,12 +12,12 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} +class GenerateDataciteDatasetSpark(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { -class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { - /** - * Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ override def run(): Unit = { val sourcePath = parser.get("sourcePath") @@ -46,49 +46,65 @@ class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log reportTotalSize(targetPath, outputBasePath) } - - /** - * For working with MDStore we need to store in a file on hdfs the size of - * the current dataset - * @param targetPath - * @param outputBasePath - */ - def reportTotalSize( targetPath: String, outputBasePath: String ):Unit = { + /** For working with MDStore we need to store in a file on hdfs the size of + * the current dataset + * @param targetPath + * @param outputBasePath + */ + def reportTotalSize(targetPath: String, outputBasePath: String): Unit = { val total_items = spark.read.text(targetPath).count() - writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) + writeHdfsFile( + spark.sparkContext.hadoopConfiguration, + s"$total_items", + outputBasePath + MDSTORE_SIZE_PATH + ) } - /** - * Generate the transformed and cleaned OAF Dataset from the native one - - * @param sourcePath sourcePath of the native Dataset in format JSON/Datacite - * @param exportLinks If true it generates unresolved links - * @param vocabularies vocabularies for cleaning - * @param targetPath the targetPath of the result Dataset - */ - def generateDataciteDataset(sourcePath: String, exportLinks: Boolean, vocabularies: VocabularyGroup, targetPath: String, spark:SparkSession):Unit = { - require(spark!= null) + /** Generate the transformed and cleaned OAF Dataset from the native one + * + * @param sourcePath sourcePath of the native Dataset in format JSON/Datacite + * @param exportLinks If true it generates unresolved links + * @param vocabularies vocabularies for cleaning + * @param targetPath the targetPath of the result Dataset + */ + def generateDataciteDataset( + sourcePath: String, + exportLinks: Boolean, + vocabularies: VocabularyGroup, + targetPath: String, + spark: SparkSession + ): Unit = { + require(spark != null) import spark.implicits._ implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] CollectionUtils.saveDataset( - spark.read.load(sourcePath).as[DataciteType] + spark.read + .load(sourcePath) + .as[DataciteType] .filter(d => d.isActive) - .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) + .flatMap(d => + DataciteToOAFTransformation + .generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks) + ) .filter(d => d != null), - targetPath) + targetPath + ) } } - object GenerateDataciteDatasetSpark { val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass) def main(args: Array[String]): Unit = { - new GenerateDataciteDatasetSpark("/eu/dnetlib/dhp/datacite/generate_dataset_params.json", args, log).initialize().run() + new GenerateDataciteDatasetSpark( + "/eu/dnetlib/dhp/datacite/generate_dataset_params.json", + args, + log + ).initialize().run() } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala index 018b4958a..cb021925a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala @@ -22,7 +22,6 @@ object ImportDatacite { val log: Logger = LoggerFactory.getLogger(ImportDatacite.getClass) - def convertAPIStringToDataciteItem(input: String): DataciteType = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: org.json4s.JValue = parse(input) @@ -32,14 +31,26 @@ object ImportDatacite { val timestamp_string = (json \ "attributes" \ "updated").extract[String] val dt = LocalDateTime.parse(timestamp_string, ISO_DATE_TIME) - DataciteType(doi = doi, timestamp = dt.toInstant(ZoneOffset.UTC).toEpochMilli / 1000, isActive = isActive, json = input) + DataciteType( + doi = doi, + timestamp = dt.toInstant(ZoneOffset.UTC).toEpochMilli / 1000, + isActive = isActive, + json = input + ) } - def main(args: Array[String]): Unit = { - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json" + ) + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") @@ -60,7 +71,8 @@ object ImportDatacite { val spkipImport = parser.get("skipImport") log.info(s"skipImport is $spkipImport") - val spark: SparkSession = SparkSession.builder() + val spark: SparkSession = SparkSession + .builder() .appName(ImportDatacite.getClass.getSimpleName) .master(master) .getOrCreate() @@ -78,45 +90,48 @@ object ImportDatacite { import spark.implicits._ + val dataciteAggregator: Aggregator[DataciteType, DataciteType, DataciteType] = + new Aggregator[DataciteType, DataciteType, DataciteType] with Serializable { - val dataciteAggregator: Aggregator[DataciteType, DataciteType, DataciteType] = new Aggregator[DataciteType, DataciteType, DataciteType] with Serializable { + override def zero: DataciteType = null - override def zero: DataciteType = null - - override def reduce(a: DataciteType, b: DataciteType): DataciteType = { - if (b == null) - return a - if (a == null) - return b - if (a.timestamp > b.timestamp) { - return a + override def reduce(a: DataciteType, b: DataciteType): DataciteType = { + if (b == null) + return a + if (a == null) + return b + if (a.timestamp > b.timestamp) { + return a + } + b } - b + + override def merge(a: DataciteType, b: DataciteType): DataciteType = { + reduce(a, b) + } + + override def bufferEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] + + override def outputEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] + + override def finish(reduction: DataciteType): DataciteType = reduction } - override def merge(a: DataciteType, b: DataciteType): DataciteType = { - reduce(a, b) - } - - override def bufferEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] - - override def outputEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] - - override def finish(reduction: DataciteType): DataciteType = reduction - } - val dump: Dataset[DataciteType] = spark.read.load(dataciteDump).as[DataciteType] val ts = dump.select(max("timestamp")).first().getLong(0) println(s"last Timestamp is $ts") - val cnt = if ("true".equalsIgnoreCase(spkipImport)) 1 else writeSequenceFile(hdfsTargetPath, ts, conf, bs) + val cnt = + if ("true".equalsIgnoreCase(spkipImport)) 1 + else writeSequenceFile(hdfsTargetPath, ts, conf, bs) println(s"Imported from Datacite API $cnt documents") if (cnt > 0) { - val inputRdd: RDD[DataciteType] = sc.sequenceFile(targetPath, classOf[Int], classOf[Text]) + val inputRdd: RDD[DataciteType] = sc + .sequenceFile(targetPath, classOf[Int], classOf[Text]) .map(s => s._2.toString) .map(s => convertAPIStringToDataciteItem(s)) spark.createDataset(inputRdd).write.mode(SaveMode.Overwrite).save(s"${targetPath}_dataset") @@ -129,7 +144,9 @@ object ImportDatacite { .agg(dataciteAggregator.toColumn) .map(s => s._2) .repartition(4000) - .write.mode(SaveMode.Overwrite).save(s"${dataciteDump}_updated") + .write + .mode(SaveMode.Overwrite) + .save(s"${dataciteDump}_updated") val fs = FileSystem.get(sc.hadoopConfiguration) fs.delete(new Path(s"$dataciteDump"), true) @@ -137,14 +154,24 @@ object ImportDatacite { } } - private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs: Int): Long = { + private def writeSequenceFile( + hdfsTargetPath: Path, + timestamp: Long, + conf: Configuration, + bs: Int + ): Long = { var from: Long = timestamp * 1000 val delta: Long = 100000000L var client: DataciteAPIImporter = null val now: Long = System.currentTimeMillis() var i = 0 try { - val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(hdfsTargetPath), SequenceFile.Writer.keyClass(classOf[IntWritable]), SequenceFile.Writer.valueClass(classOf[Text])) + val writer = SequenceFile.createWriter( + conf, + SequenceFile.Writer.file(hdfsTargetPath), + SequenceFile.Writer.keyClass(classOf[IntWritable]), + SequenceFile.Writer.valueClass(classOf[Text]) + ) try { var start: Long = System.currentTimeMillis while (from < now) { @@ -153,16 +180,16 @@ object ImportDatacite { val key: IntWritable = new IntWritable(i) val value: Text = new Text while (client.hasNext) { - key.set({ + key.set { i += 1; i - 1 - }) + } value.set(client.next()) writer.append(key, value) writer.hflush() if (i % 1000 == 0) { end = System.currentTimeMillis - val time = (end - start) / 1000.0F + val time = (end - start) / 1000.0f println(s"Imported $i in $time seconds") start = System.currentTimeMillis } @@ -174,8 +201,7 @@ object ImportDatacite { case e: Throwable => println("Error", e) } finally if (writer != null) writer.close() - } - catch { + } catch { case e: Throwable => log.error("Error", e) } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala index d46e5423d..3e61edf02 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala @@ -17,7 +17,13 @@ object SparkDownloadUpdateDatacite { def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json") + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") @@ -26,8 +32,9 @@ object SparkDownloadUpdateDatacite { val hdfsuri = parser.get("namenode") log.info(s"namenode is $hdfsuri") - - val spark: SparkSession = SparkSession.builder().config(conf) + val spark: SparkSession = SparkSession + .builder() + .config(conf) .appName(getClass.getSimpleName) .master(master) .getOrCreate() @@ -37,13 +44,18 @@ object SparkDownloadUpdateDatacite { import spark.implicits._ - - val maxDate: String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0) + val maxDate: String = spark.read + .load(workingPath) + .as[Oaf] + .filter(s => s.isInstanceOf[Result]) + .map(r => r.asInstanceOf[Result].getDateofcollection) + .select(max("value")) + .first() + .getString(0) val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) val string_to_date = ISO8601FORMAT.parse(maxDate) val ts = string_to_date.getTime - } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 853b24862..ffdab1799 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -12,39 +12,81 @@ object BioDBToOAF { case class EBILinkItem(id: Long, links: String) {} - case class EBILinks(relType: String, date: String, title: String, pmid: String, targetPid: String, targetPidType: String, targetUrl: String) {} + case class EBILinks( + relType: String, + date: String, + title: String, + pmid: String, + targetPid: String, + targetPidType: String, + targetUrl: String + ) {} case class UniprotDate(date: String, date_info: String) {} - case class ScholixResolved(pid: String, pidType: String, typology: String, tilte: List[String], datasource: List[String], date: List[String], authors: List[String]) {} + case class ScholixResolved( + pid: String, + pidType: String, + typology: String, + tilte: List[String], + datasource: List[String], + date: List[String], + authors: List[String] + ) {} - val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") + val DATA_INFO: DataInfo = OafMapperUtils.dataInfo( + false, + null, + false, + false, + ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, + "0.9" + ) val SUBJ_CLASS = "Keywords" val DATE_RELATION_KEY = "RelationDate" val resolvedURL: Map[String, String] = Map( - "genbank" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-wgs" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-p" -> "https://www.ncbi.nlm.nih.gov/protein/", - "ena" -> "https://www.ebi.ac.uk/ena/browser/view/", + "genbank" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-wgs" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-p" -> "https://www.ncbi.nlm.nih.gov/protein/", + "ena" -> "https://www.ebi.ac.uk/ena/browser/view/", "clinicaltrials.gov" -> "https://clinicaltrials.gov/ct2/show/", - "onim" -> "https://omim.org/entry/", - "refseq" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "geo" -> "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" + "onim" -> "https://omim.org/entry/", + "refseq" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "geo" -> "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" ) - val collectedFromMap: Map[String, KeyValue] = { - val PDBCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", "Protein Data Bank") - val enaCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|re3data_____::c2a591f440598b63d854556beaf01591", "European Nucleotide Archive") - val ncbiCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|re3data_____::7d4f90870fe1e493232c9e86c43ae6f6", "NCBI Nucleotide") - val UNIPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|re3data_____::296e1abaf1302897a6838d3588cd0310", "UniProtKB/Swiss-Prot") - val ElsevierCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier") - val springerNatureCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|openaire____::6e380d9cf51138baec8480f5a0ce3a2e", "Springer Nature") - val EBICollectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::83e60e09c222f206c725385f53d7e567c", "EMBL-EBIs Protein Data Bank in Europe (PDBe)") - val pubmedCollectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + val PDBCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", + "Protein Data Bank" + ) + val enaCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|re3data_____::c2a591f440598b63d854556beaf01591", + "European Nucleotide Archive" + ) + val ncbiCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|re3data_____::7d4f90870fe1e493232c9e86c43ae6f6", + "NCBI Nucleotide" + ) + val UNIPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|re3data_____::296e1abaf1302897a6838d3588cd0310", + "UniProtKB/Swiss-Prot" + ) + val ElsevierCollectedFrom: KeyValue = + OafMapperUtils.keyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier") + val springerNatureCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|openaire____::6e380d9cf51138baec8480f5a0ce3a2e", + "Springer Nature" + ) + val EBICollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|opendoar____::83e60e09c222f206c725385f53d7e567c", + "EMBL-EBIs Protein Data Bank in Europe (PDBe)" + ) + val pubmedCollectedFrom: KeyValue = + OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") UNIPROTCollectedFrom.setDataInfo(DATA_INFO) PDBCollectedFrom.setDataInfo(DATA_INFO) @@ -56,14 +98,14 @@ object BioDBToOAF { springerNatureCollectedFrom.setDataInfo(DATA_INFO) Map( - "uniprot" -> UNIPROTCollectedFrom, - "pdb" -> PDBCollectedFrom, - "elsevier" -> ElsevierCollectedFrom, - "ebi" -> EBICollectedFrom, - "Springer Nature" -> springerNatureCollectedFrom, - "NCBI Nucleotide" -> ncbiCollectedFrom, + "uniprot" -> UNIPROTCollectedFrom, + "pdb" -> PDBCollectedFrom, + "elsevier" -> ElsevierCollectedFrom, + "ebi" -> EBICollectedFrom, + "Springer Nature" -> springerNatureCollectedFrom, + "NCBI Nucleotide" -> ncbiCollectedFrom, "European Nucleotide Archive" -> enaCollectedFrom, - "Europe PMC" -> pubmedCollectedFrom + "Europe PMC" -> pubmedCollectedFrom ) } @@ -80,18 +122,32 @@ object BioDBToOAF { val date = GraphCleaningFunctions.cleanDate((json \ "LinkedPublicationDate").extract[String]) - createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type), collectedFromMap("elsevier"), "relationship", relation_semantic, date) + createRelation( + target_pid, + target_pid_type, + generate_unresolved_id(source_pid, source_pid_type), + collectedFromMap("elsevier"), + "relationship", + relation_semantic, + date + ) } - def scholixResolvedToOAF(input: ScholixResolved): Oaf = { val d = new Dataset d.setPid( List( - OafMapperUtils.structuredProperty(input.pid.toLowerCase, input.pidType.toLowerCase, input.pidType.toLowerCase, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) + OafMapperUtils.structuredProperty( + input.pid.toLowerCase, + input.pidType.toLowerCase, + input.pidType.toLowerCase, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) ).asJava ) @@ -101,7 +157,15 @@ object BioDBToOAF { d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.pid.toLowerCase}", true)) if (input.tilte != null && input.tilte.nonEmpty) - d.setTitle(List(OafMapperUtils.structuredProperty(input.tilte.head, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) + d.setTitle( + List( + OafMapperUtils.structuredProperty( + input.tilte.head, + ModelConstants.MAIN_TITLE_QUALIFIER, + DATA_INFO + ) + ).asJava + ) d.setOriginalId(List(input.pid).asJava) val i = new Instance @@ -113,9 +177,23 @@ object BioDBToOAF { } if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) - i.setInstancetype(OafMapperUtils.qualifier("0037", "Clinical Trial", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + i.setInstancetype( + OafMapperUtils.qualifier( + "0037", + "Clinical Trial", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) else - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) if (input.datasource == null || input.datasource.isEmpty) return null @@ -141,7 +219,6 @@ object BioDBToOAF { d } - def uniprotToOAF(input: String): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) @@ -151,7 +228,14 @@ object BioDBToOAF { d.setPid( List( - OafMapperUtils.structuredProperty(pid, "uniprot", "uniprot", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) + OafMapperUtils.structuredProperty( + pid, + "uniprot", + "uniprot", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) ).asJava ) @@ -162,32 +246,52 @@ object BioDBToOAF { val title: String = (json \ "title").extractOrElse[String](null) if (title != null) - d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) + d.setTitle( + List( + OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO) + ).asJava + ) d.setOriginalId(List(pid).asJava) val i = new Instance i.setPid(d.getPid) i.setUrl(List(s"https://www.uniprot.org/uniprot/$pid").asJava) - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) i.setCollectedfrom(collectedFromMap("uniprot")) d.setInstance(List(i).asJava) val dates: List[UniprotDate] = for { - JObject(dateOBJ) <- json \ "dates" - JField("date", JString(date)) <- dateOBJ + JObject(dateOBJ) <- json \ "dates" + JField("date", JString(date)) <- dateOBJ JField("date_info", JString(date_info)) <- dateOBJ } yield UniprotDate(GraphCleaningFunctions.cleanDate(date), date_info) val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null) - if (subjects != null) { d.setSubject( - subjects.map(s => - OafMapperUtils.structuredProperty(s, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null) - ).asJava) + subjects + .map(s => + OafMapperUtils.structuredProperty( + s, + SUBJ_CLASS, + SUBJ_CLASS, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + null + ) + ) + .asJava + ) } var i_date: Option[UniprotDate] = None @@ -197,45 +301,73 @@ object BioDBToOAF { i.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) } - val relevant_dates: List[StructuredProperty] = dates.filter(d => !d.date_info.contains("entry version")) - .map(date => OafMapperUtils.structuredProperty(date.date, ModelConstants.UNKNOWN, ModelConstants.UNKNOWN, ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) + val relevant_dates: List[StructuredProperty] = dates + .filter(d => !d.date_info.contains("entry version")) + .map(date => + OafMapperUtils.structuredProperty( + date.date, + ModelConstants.UNKNOWN, + ModelConstants.UNKNOWN, + ModelConstants.DNET_DATACITE_DATE, + ModelConstants.DNET_DATACITE_DATE, + DATA_INFO + ) + ) if (relevant_dates != null && relevant_dates.nonEmpty) d.setRelevantdate(relevant_dates.asJava) d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) } - val references_pmid: List[String] = for { - JObject(reference) <- json \ "references" + JObject(reference) <- json \ "references" JField("PubMed", JString(pid)) <- reference } yield pid val references_doi: List[String] = for { - JObject(reference) <- json \ "references" + JObject(reference) <- json \ "references" JField(" DOI", JString(pid)) <- reference } yield pid - if (references_pmid != null && references_pmid.nonEmpty) { - val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation( + references_pmid.head, + "pmid", + d.getId, + collectedFromMap("uniprot"), + ModelConstants.RELATIONSHIP, + ModelConstants.IS_RELATED_TO, + if (i_date.isDefined) i_date.get.date else null + ) rel.getCollectedfrom List(d, rel) - } - else if (references_doi != null && references_doi.nonEmpty) { - val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) + } else if (references_doi != null && references_doi.nonEmpty) { + val rel = createRelation( + references_doi.head, + "doi", + d.getId, + collectedFromMap("uniprot"), + ModelConstants.RELATIONSHIP, + ModelConstants.IS_RELATED_TO, + if (i_date.isDefined) i_date.get.date else null + ) List(d, rel) - } - else + } else List(d) } - def generate_unresolved_id(pid: String, pidType: String): String = { s"unresolved::$pid::$pidType" } - - def createRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, subRelType: String, relClass: String, date: String): Relation = { + def createRelation( + pid: String, + pidType: String, + sourceId: String, + collectedFrom: KeyValue, + subRelType: String, + relClass: String, + date: String + ): Relation = { val rel = new Relation rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) @@ -248,7 +380,6 @@ object BioDBToOAF { rel.setSource(sourceId) rel.setTarget(s"unresolved::$pid::$pidType") - val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) rel.setProperties(List(dateProps).asJava) @@ -259,12 +390,24 @@ object BioDBToOAF { } - - def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date: String): Relation = { - createRelation(pid, pidType, sourceId, collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date) + def createSupplementaryRelation( + pid: String, + pidType: String, + sourceId: String, + collectedFrom: KeyValue, + date: String + ): Relation = { + createRelation( + pid, + pidType, + sourceId, + collectedFrom, + ModelConstants.SUPPLEMENT, + ModelConstants.IS_SUPPLEMENT_TO, + date + ) } - def pdbTOOaf(input: String): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) @@ -277,7 +420,14 @@ object BioDBToOAF { d.setPid( List( - OafMapperUtils.structuredProperty(pdb, "pdb", "Protein Data Bank Identifier", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) + OafMapperUtils.structuredProperty( + pdb, + "pdb", + "Protein Data Bank Identifier", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) ).asJava ) @@ -290,13 +440,16 @@ object BioDBToOAF { if (title == null) return List() - d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) + d.setTitle( + List( + OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO) + ).asJava + ) val authors: List[String] = (json \ "authors").extractOrElse[List[String]](null) if (authors != null) { val convertedAuthors = authors.zipWithIndex.map { a => - val res = new Author res.setFullname(a._1) res.setRank(a._2 + 1) @@ -310,7 +463,14 @@ object BioDBToOAF { i.setPid(d.getPid) i.setUrl(List(s"https://www.rcsb.org/structure/$pdb").asJava) - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) i.setCollectedfrom(collectedFromMap("pdb")) d.setInstance(List(i).asJava) @@ -323,7 +483,6 @@ object BioDBToOAF { List(d) } - def extractEBILinksFromDump(input: String): EBILinkItem = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) @@ -333,49 +492,70 @@ object BioDBToOAF { EBILinkItem(pmid.toLong, compact(render(links))) } - def EBITargetLinksFilter(input: EBILinks): Boolean = { - input.targetPidType.equalsIgnoreCase("ena") || input.targetPidType.equalsIgnoreCase("pdb") || input.targetPidType.equalsIgnoreCase("uniprot") + input.targetPidType.equalsIgnoreCase("ena") || input.targetPidType.equalsIgnoreCase( + "pdb" + ) || input.targetPidType.equalsIgnoreCase("uniprot") } - def parse_ebi_links(input: String): List[EBILinks] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) val pmid = (json \ "request" \ "id").extract[String] for { - JObject(link) <- json \\ "Link" - JField("Target", JObject(target)) <- link - JField("RelationshipType", JObject(relType)) <- link - JField("Name", JString(relation)) <- relType + JObject(link) <- json \\ "Link" + JField("Target", JObject(target)) <- link + JField("RelationshipType", JObject(relType)) <- link + JField("Name", JString(relation)) <- relType JField("PublicationDate", JString(publicationDate)) <- link - JField("Title", JString(title)) <- target - JField("Identifier", JObject(identifier)) <- target - JField("IDScheme", JString(idScheme)) <- identifier - JField("IDURL", JString(idUrl)) <- identifier - JField("ID", JString(id)) <- identifier + JField("Title", JString(title)) <- target + JField("Identifier", JObject(identifier)) <- target + JField("IDScheme", JString(idScheme)) <- identifier + JField("IDURL", JString(idUrl)) <- identifier + JField("ID", JString(id)) <- identifier - } yield EBILinks(relation, GraphCleaningFunctions.cleanDate(publicationDate), title, pmid, id, idScheme, idUrl) + } yield EBILinks( + relation, + GraphCleaningFunctions.cleanDate(publicationDate), + title, + pmid, + id, + idScheme, + idUrl + ) } - def convertEBILinksToOaf(input: EBILinks): List[Oaf] = { val d = new Dataset d.setCollectedfrom(List(collectedFromMap("ebi")).asJava) d.setDataInfo(DATA_INFO) - d.setTitle(List(OafMapperUtils.structuredProperty(input.title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) + d.setTitle( + List( + OafMapperUtils.structuredProperty( + input.title, + ModelConstants.MAIN_TITLE_QUALIFIER, + DATA_INFO + ) + ).asJava + ) val nsPrefix = input.targetPidType.toLowerCase.padTo(12, '_') d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.targetPid.toLowerCase}", true)) d.setOriginalId(List(input.targetPid.toLowerCase).asJava) - d.setPid( List( - OafMapperUtils.structuredProperty(input.targetPid.toLowerCase, input.targetPidType.toLowerCase, "Protein Data Bank Identifier", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) + OafMapperUtils.structuredProperty( + input.targetPid.toLowerCase, + input.targetPidType.toLowerCase, + "Protein Data Bank Identifier", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) ).asJava ) @@ -383,13 +563,35 @@ object BioDBToOAF { i.setPid(d.getPid) i.setUrl(List(input.targetUrl).asJava) - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) i.setCollectedfrom(collectedFromMap("ebi")) d.setInstance(List(i).asJava) - i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) - d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) + i.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO) + ) + d.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO) + ) - List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, GraphCleaningFunctions.cleanDate(input.date))) + List( + d, + createRelation( + input.pmid, + "pmid", + d.getId, + collectedFromMap("ebi"), + ModelConstants.RELATIONSHIP, + ModelConstants.IS_RELATED_TO, + GraphCleaningFunctions.cleanDate(input.date) + ) + ) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 27caa8f36..96075b4f3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -14,7 +14,11 @@ object SparkTransformBioDatabaseToOAF { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val log: Logger = LoggerFactory.getLogger(getClass) - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json") + ) + ) parser.parseArgument(args) val database: String = parser.get("database") log.info("database: {}", database) @@ -29,20 +33,33 @@ object SparkTransformBioDatabaseToOAF { .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sc = spark.sparkContext implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))), targetPath) + CollectionUtils.saveDataset( + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))), + targetPath + ) case "PDB" => - CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))), targetPath) + CollectionUtils.saveDataset( + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))), + targetPath + ) case "SCHOLIX" => - CollectionUtils.saveDataset(spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)), targetPath) + CollectionUtils.saveDataset( + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)), + targetPath + ) case "CROSSREF_LINKS" => - CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))), targetPath) + CollectionUtils.saveDataset( + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))), + targetPath + ) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 0fea4ff7f..9c55ec7be 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -24,31 +24,37 @@ import scala.xml.pull.XMLEventReader object SparkCreateBaselineDataFrame { - def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = { val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") - val result = data.lines.filter(l => l.startsWith("
") - val start = l.indexOf(" l.startsWith("") + val start = l.indexOf("= 0 && end > start) - l.substring(start + 9, end - start) - else - "" - }.filter(s => s.endsWith(".gz")).filter(s => s > maxFile).map(s => (s, s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList + if (start >= 0 && end > start) + l.substring(start + 9, end - start) + else + "" + } + .filter(s => s.endsWith(".gz")) + .filter(s => s > maxFile) + .map(s => (s, s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")) + .toList result } - def downloadBaselinePart(url: String): InputStream = { val r = new HttpGet(url) val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() val response = client.execute(r) println(s"get response with status${response.getStatusLine.getStatusCode}") @@ -59,10 +65,12 @@ object SparkCreateBaselineDataFrame { def requestPage(url: String): String = { val r = new HttpGet(url) val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() try { var tries = 4 @@ -73,8 +81,7 @@ object SparkCreateBaselineDataFrame { println(s"get response with status${response.getStatusLine.getStatusCode}") if (response.getStatusLine.getStatusCode > 400) { tries -= 1 - } - else + } else return IOUtils.toString(response.getEntity.getContent) } catch { case e: Throwable => @@ -90,10 +97,8 @@ object SparkCreateBaselineDataFrame { } } - def downloadBaseLineUpdate(baselinePath: String, hdfsServerUri: String): Unit = { - val conf = new Configuration conf.set("fs.defaultFS", hdfsServerUri) val fs = FileSystem.get(conf) @@ -122,31 +127,36 @@ object SparkCreateBaselineDataFrame { } + val pmArticleAggregator: Aggregator[(String, PMArticle), PMArticle, PMArticle] = + new Aggregator[(String, PMArticle), PMArticle, PMArticle] with Serializable { + override def zero: PMArticle = new PMArticle - val pmArticleAggregator: Aggregator[(String, PMArticle), PMArticle, PMArticle] = new Aggregator[(String, PMArticle), PMArticle, PMArticle] with Serializable { - override def zero: PMArticle = new PMArticle + override def reduce(b: PMArticle, a: (String, PMArticle)): PMArticle = { + if (b != null && b.getPmid != null) b else a._2 + } - override def reduce(b: PMArticle, a: (String, PMArticle)): PMArticle = { - if (b != null && b.getPmid != null) b else a._2 + override def merge(b1: PMArticle, b2: PMArticle): PMArticle = { + if (b1 != null && b1.getPmid != null) b1 else b2 + + } + + override def finish(reduction: PMArticle): PMArticle = reduction + + override def bufferEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] + + override def outputEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] } - override def merge(b1: PMArticle, b2: PMArticle): PMArticle = { - if (b1 != null && b1.getPmid != null) b1 else b2 - - } - - override def finish(reduction: PMArticle): PMArticle = reduction - - override def bufferEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] - - override def outputEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] - } - - def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val log: Logger = LoggerFactory.getLogger(getClass) - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkEBILinksToOaf.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkEBILinksToOaf.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val isLookupUrl: String = parser.get("isLookupUrl") log.info("isLookupUrl: {}", isLookupUrl) @@ -162,7 +172,6 @@ object SparkCreateBaselineDataFrame { val skipUpdate = parser.get("skipUpdate") log.info("skipUpdate: {}", skipUpdate) - val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) val spark: SparkSession = @@ -170,7 +179,8 @@ object SparkCreateBaselineDataFrame { .builder() .config(conf) .appName(SparkEBILinksToOaf.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sc = spark.sparkContext import spark.implicits._ @@ -183,20 +193,30 @@ object SparkCreateBaselineDataFrame { if (!"true".equalsIgnoreCase(skipUpdate)) { downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000) - val ds: Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i => { - val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) - new PMParser(xml) - })) - ds.map(p => (p.getPmid, p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1) + val ds: Dataset[PMArticle] = spark.createDataset( + k.filter(i => i._1.endsWith(".gz")) + .flatMap(i => { + val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) + new PMParser(xml) + }) + ) + ds.map(p => (p.getPmid, p))(Encoders.tuple(Encoders.STRING, PMEncoder)) + .groupByKey(_._1) .agg(pmArticleAggregator.toColumn) - .map(p => p._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset") + .map(p => p._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/baseline_dataset") } val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle] - CollectionUtils.saveDataset(exported_dataset - .map(a => PubMedToOaf.convert(a, vocabularies)).as[Oaf] - .filter(p => p != null), - targetPath) + CollectionUtils.saveDataset( + exported_dataset + .map(a => PubMedToOaf.convert(a, vocabularies)) + .as[Oaf] + .filter(p => p != null), + targetPath + ) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index 18e39387f..44e9e22ea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -25,10 +25,12 @@ object SparkDownloadEBILinks { def requestPage(url: String): String = { val r = new HttpGet(url) val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() try { var tries = 4 @@ -39,8 +41,7 @@ object SparkDownloadEBILinks { println(s"get response with status${response.getStatusLine.getStatusCode}") if (response.getStatusLine.getStatusCode > 400) { tries -= 1 - } - else + } else return IOUtils.toString(response.getEntity.getContent) } catch { case e: Throwable => @@ -66,14 +67,19 @@ object SparkDownloadEBILinks { val log: Logger = LoggerFactory.getLogger(getClass) val MAX_ITEM_PER_PARTITION = 20000 val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_download_update.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_download_update.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkEBILinksToOaf.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() import spark.implicits._ @@ -87,22 +93,40 @@ object SparkDownloadEBILinks { log.info(s"workingPath -> $workingPath") log.info("Getting max pubmedId where the links have already requested") - val links: Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] + val links: Dataset[EBILinkItem] = + spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] val lastPMIDRequested = links.map(l => l.id).select(max("value")).first.getLong(0) log.info("Retrieving PMID to request links") val pubmed = spark.read.load(s"$sourcePath/baseline_dataset").as[PMArticle] - pubmed.map(p => p.getPmid.toLong).where(s"value > $lastPMIDRequested").write.mode(SaveMode.Overwrite).save(s"$workingPath/id_to_request") + pubmed + .map(p => p.getPmid.toLong) + .where(s"value > $lastPMIDRequested") + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/id_to_request") val pmidToReq: Dataset[Long] = spark.read.load(s"$workingPath/id_to_request").as[Long] val total = pmidToReq.count() - spark.createDataset(pmidToReq.rdd.repartition((total / MAX_ITEM_PER_PARTITION).toInt).map(pmid => createEBILinks(pmid)).filter(l => l != null)).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_update") + spark + .createDataset( + pmidToReq.rdd + .repartition((total / MAX_ITEM_PER_PARTITION).toInt) + .map(pmid => createEBILinks(pmid)) + .filter(l => l != null) + ) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/links_update") - val updates: Dataset[EBILinkItem] = spark.read.load(s"$workingPath/links_update").as[EBILinkItem] + val updates: Dataset[EBILinkItem] = + spark.read.load(s"$workingPath/links_update").as[EBILinkItem] - links.union(updates).groupByKey(_.id) + links + .union(updates) + .groupByKey(_.id) .reduceGroups { (x, y) => if (x == null || x.links == null) y @@ -112,6 +136,10 @@ object SparkDownloadEBILinks { x else y - }.map(_._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_final") + } + .map(_._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/links_final") } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index cd03f004d..7cb6153ff 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -15,15 +15,19 @@ object SparkEBILinksToOaf { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkEBILinksToOaf.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() import spark.implicits._ val sourcePath = parser.get("sourcePath") @@ -32,11 +36,17 @@ object SparkEBILinksToOaf { log.info(s"targetPath -> $targetPath") implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val ebLinks: Dataset[EBILinkItem] = spark.read.load(sourcePath).as[EBILinkItem].filter(l => l.links != null && l.links.startsWith("{")) + val ebLinks: Dataset[EBILinkItem] = spark.read + .load(sourcePath) + .as[EBILinkItem] + .filter(l => l.links != null && l.links.startsWith("{")) - CollectionUtils.saveDataset(ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) - .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) - .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)), - targetPath) + CollectionUtils.saveDataset( + ebLinks + .flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) + .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) + .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)), + targetPath + ) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala index c6d5fdf74..49a271641 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala @@ -3,16 +3,13 @@ package eu.dnetlib.dhp.sx.bio.pubmed import scala.xml.MetaData import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader} +/** @param xml + */ +class PMParser(xml: XMLEventReader) extends Iterator[PMArticle] { -/** - * - * @param xml - */ -class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] { + var currentArticle: PMArticle = generateNextArticle() - var currentArticle:PMArticle = generateNextArticle() - - override def hasNext: Boolean = currentArticle!= null + override def hasNext: Boolean = currentArticle != null override def next(): PMArticle = { val tmp = currentArticle @@ -20,33 +17,30 @@ class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] { tmp } - def extractAttributes(attrs:MetaData, key:String):String = { + def extractAttributes(attrs: MetaData, key: String): String = { val res = attrs.get(key) if (res.isDefined) { - val s =res.get + val s = res.get if (s != null && s.nonEmpty) s.head.text else null - } - else null + } else null } - - def validate_Date(year:String, month:String, day:String):String = { + def validate_Date(year: String, month: String, day: String): String = { try { f"${year.toInt}-${month.toInt}%02d-${day.toInt}%02d" } catch { - case _: Throwable =>null + case _: Throwable => null } } - def generateNextArticle():PMArticle = { + def generateNextArticle(): PMArticle = { - - var currentSubject:PMSubject = null + var currentSubject: PMSubject = null var currentAuthor: PMAuthor = null var currentJournal: PMJournal = null var currentGrant: PMGrant = null @@ -54,12 +48,7 @@ class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] { var currentYear = "0" var currentMonth = "01" var currentDay = "01" - var currentArticleType:String = null - - - - - + var currentArticleType: String = null while (xml.hasNext) { xml.next match { @@ -68,64 +57,67 @@ class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] { label match { case "PubmedArticle" => currentArticle = new PMArticle - case "Author" => currentAuthor = new PMAuthor - case "Journal" => currentJournal = new PMJournal - case "Grant" => currentGrant = new PMGrant + case "Author" => currentAuthor = new PMAuthor + case "Journal" => currentJournal = new PMJournal + case "Grant" => currentGrant = new PMGrant case "PublicationType" | "DescriptorName" => currentSubject = new PMSubject currentSubject.setMeshId(extractAttributes(attrs, "UI")) - case "ArticleId" => currentArticleType = extractAttributes(attrs,"IdType") - case _ => + case "ArticleId" => currentArticleType = extractAttributes(attrs, "IdType") + case _ => } case EvElemEnd(_, label) => label match { case "PubmedArticle" => return currentArticle - case "Author" => currentArticle.getAuthors.add(currentAuthor) - case "Journal" => currentArticle.setJournal(currentJournal) - case "Grant" => currentArticle.getGrants.add(currentGrant) - case "PubMedPubDate" => if (currentArticle.getDate== null) - currentArticle.setDate(validate_Date(currentYear,currentMonth,currentDay)) - case "PubDate" => currentJournal.setDate(s"$currentYear-$currentMonth-$currentDay") - case "DescriptorName" => currentArticle.getSubjects.add(currentSubject) - case "PublicationType" =>currentArticle.getPublicationTypes.add(currentSubject) - case _ => + case "Author" => currentArticle.getAuthors.add(currentAuthor) + case "Journal" => currentArticle.setJournal(currentJournal) + case "Grant" => currentArticle.getGrants.add(currentGrant) + case "PubMedPubDate" => + if (currentArticle.getDate == null) + currentArticle.setDate(validate_Date(currentYear, currentMonth, currentDay)) + case "PubDate" => currentJournal.setDate(s"$currentYear-$currentMonth-$currentDay") + case "DescriptorName" => currentArticle.getSubjects.add(currentSubject) + case "PublicationType" => currentArticle.getPublicationTypes.add(currentSubject) + case _ => } case EvText(text) => - if (currNode!= null && text.trim.nonEmpty) + if (currNode != null && text.trim.nonEmpty) currNode match { case "ArticleTitle" => { - if (currentArticle.getTitle==null) + if (currentArticle.getTitle == null) currentArticle.setTitle(text.trim) else currentArticle.setTitle(currentArticle.getTitle + text.trim) } case "AbstractText" => { - if (currentArticle.getDescription==null) + if (currentArticle.getDescription == null) currentArticle.setDescription(text.trim) else currentArticle.setDescription(currentArticle.getDescription + text.trim) } case "PMID" => currentArticle.setPmid(text.trim) - case "ArticleId" => if ("doi".equalsIgnoreCase(currentArticleType)) currentArticle.setDoi(text.trim) - case "Language" => currentArticle.setLanguage(text.trim) - case "ISSN" => currentJournal.setIssn(text.trim) - case "GrantID" => currentGrant.setGrantID(text.trim) - case "Agency" => currentGrant.setAgency(text.trim) - case "Country" => if (currentGrant != null) currentGrant.setCountry(text.trim) - case "Year" => currentYear = text.trim - case "Month" => currentMonth = text.trim - case "Day" => currentDay = text.trim - case "Volume" => currentJournal.setVolume( text.trim) - case "Issue" => currentJournal.setIssue (text.trim) + case "ArticleId" => + if ("doi".equalsIgnoreCase(currentArticleType)) currentArticle.setDoi(text.trim) + case "Language" => currentArticle.setLanguage(text.trim) + case "ISSN" => currentJournal.setIssn(text.trim) + case "GrantID" => currentGrant.setGrantID(text.trim) + case "Agency" => currentGrant.setAgency(text.trim) + case "Country" => if (currentGrant != null) currentGrant.setCountry(text.trim) + case "Year" => currentYear = text.trim + case "Month" => currentMonth = text.trim + case "Day" => currentDay = text.trim + case "Volume" => currentJournal.setVolume(text.trim) + case "Issue" => currentJournal.setIssue(text.trim) case "PublicationType" | "DescriptorName" => currentSubject.setValue(text.trim) case "LastName" => { if (currentAuthor != null) currentAuthor.setLastName(text.trim) } - case "ForeName" => if (currentAuthor != null) - currentAuthor.setForeName(text.trim) + case "ForeName" => + if (currentAuthor != null) + currentAuthor.setForeName(text.trim) case "Title" => - if (currentJournal.getTitle==null) + if (currentJournal.getTitle == null) currentJournal.setTitle(text.trim) else currentJournal.setTitle(currentJournal.getTitle + text.trim) @@ -139,8 +131,3 @@ class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] { null } } - - - - - diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 65717adff..857609368 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -2,38 +2,50 @@ package eu.dnetlib.dhp.sx.bio.pubmed import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.oaf.utils.{ + GraphCleaningFunctions, + IdentifierFactory, + OafMapperUtils, + PidType +} import eu.dnetlib.dhp.schema.oaf._ import collection.JavaConverters._ import java.util.regex.Pattern /** - * - */ + */ object PubMedToOaf { val SUBJ_CLASS = "keywords" + val urlMap = Map( "pmid" -> "https://pubmed.ncbi.nlm.nih.gov/", - "doi" -> "https://dx.doi.org/" + "doi" -> "https://dx.doi.org/" ) - val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") - val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + val dataInfo: DataInfo = OafMapperUtils.dataInfo( + false, + null, + false, + false, + ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, + "0.9" + ) - /** - * Cleaning the DOI Applying regex in order to - * remove doi starting with URL - * - * @param doi input DOI - * @return cleaned DOI - */ + val collectedFrom: KeyValue = + OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + + /** Cleaning the DOI Applying regex in order to + * remove doi starting with URL + * + * @param doi input DOI + * @return cleaned DOI + */ def cleanDoi(doi: String): String = { val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$" - val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) val matcher = pattern.matcher(doi) @@ -43,33 +55,34 @@ object PubMedToOaf { null } - /** - * - * Create an instance of class extends Result - * starting from OAF instanceType value - * - * @param cobjQualifier OAF instance type - * @param vocabularies All dnet vocabularies - * @return the correct instance - */ + /** Create an instance of class extends Result + * starting from OAF instanceType value + * + * @param cobjQualifier OAF instance type + * @param vocabularies All dnet vocabularies + * @return the correct instance + */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { - val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) + val result_typologies = getVocabularyTerm( + ModelConstants.DNET_RESULT_TYPOLOGIES, + vocabularies, + cobjQualifier.getClassid + ) result_typologies.getClassid match { - case "dataset" => new Dataset + case "dataset" => new Dataset case "publication" => new Publication - case "other" => new OtherResearchProduct - case "software" => new Software - case _ => null + case "other" => new OtherResearchProduct + case "software" => new Software + case _ => null } } - /** - * Mapping the Pubmedjournal info into the OAF Journale - * - * @param j the pubmedJournal - * @return the OAF Journal - */ + /** Mapping the Pubmedjournal info into the OAF Journale + * + * @param j the pubmedJournal + * @return the OAF Journal + */ def mapJournal(j: PMJournal): Journal = { if (j == null) return null @@ -83,40 +96,47 @@ object PubMedToOaf { journal.setIss(j.getIssue) journal - } - /** - * - * Find vocabulary term into synonyms and term in the vocabulary - * - * @param vocabularyName the input vocabulary name - * @param vocabularies all the vocabularies - * @param term the term to find - * @return the cleaned term value - */ - def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { + /** Find vocabulary term into synonyms and term in the vocabulary + * + * @param vocabularyName the input vocabulary name + * @param vocabularies all the vocabularies + * @param term the term to find + * @return the cleaned term value + */ + def getVocabularyTerm( + vocabularyName: String, + vocabularies: VocabularyGroup, + term: String + ): Qualifier = { val a = vocabularies.getSynonymAsQualifier(vocabularyName, term) val b = vocabularies.getTermAsQualifier(vocabularyName, term) if (a == null) b else a } - - /** - * Map the Pubmed Article into the OAF instance - * - * @param article the pubmed articles - * @param vocabularies the vocabularies - * @return The OAF instance if the mapping did not fail - */ + /** Map the Pubmed Article into the OAF instance + * + * @param article the pubmed articles + * @param vocabularies the vocabularies + * @return The OAF instance if the mapping did not fail + */ def convert(article: PMArticle, vocabularies: VocabularyGroup): Oaf = { if (article.getPublicationTypes == null) return null - // MAP PMID into pid with classid = classname = pmid - val pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) + val pidList: List[StructuredProperty] = List( + OafMapperUtils.structuredProperty( + article.getPmid, + PidType.pmid.toString, + PidType.pmid.toString, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + dataInfo + ) + ) if (pidList == null) return null @@ -125,7 +145,14 @@ object PubMedToOaf { if (article.getDoi != null) { val normalizedPid = cleanDoi(article.getDoi) if (normalizedPid != null) - alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo) + alternateIdentifier = OafMapperUtils.structuredProperty( + normalizedPid, + PidType.doi.toString, + PidType.doi.toString, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + dataInfo + ) } // INSTANCE MAPPING @@ -133,14 +160,18 @@ object PubMedToOaf { // If the article contains the typology Journal Article then we apply this type //else We have to find a terms that match the vocabulary otherwise we discard it - val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) + val ja = + article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) val pubmedInstance = new Instance if (ja.isDefined) { - val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) + val cojbCategory = + getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) pubmedInstance.setInstancetype(cojbCategory) } else { val i_type = article.getPublicationTypes.asScala - .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) + .map(s => + getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue) + ) .find(q => q != null) if (i_type.isDefined) pubmedInstance.setInstancetype(i_type.get) @@ -155,7 +186,9 @@ object PubMedToOaf { if (alternateIdentifier != null) pubmedInstance.setAlternateIdentifier(List(alternateIdentifier).asJava) result.setInstance(List(pubmedInstance).asJava) - pubmedInstance.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection.breakOut) + pubmedInstance.getPid.asScala + .filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)) + .map(p => p.getValue)(collection.breakOut) //CREATE URL From pmid val urlLists: List[String] = pidList .map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue)) @@ -165,7 +198,9 @@ object PubMedToOaf { pubmedInstance.setUrl(urlLists.asJava) //ASSIGN DateofAcceptance - pubmedInstance.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) + pubmedInstance.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo) + ) //ASSIGN COLLECTEDFROM pubmedInstance.setCollectedfrom(collectedFrom) result.setPid(pidList.asJava) @@ -173,7 +208,6 @@ object PubMedToOaf { //END INSTANCE MAPPING //-------------------------------------------------------------------------------------- - // JOURNAL MAPPING //-------------------------------------------------------------------------------------- if (article.getJournal != null && result.isInstanceOf[Publication]) @@ -182,31 +216,48 @@ object PubMedToOaf { //END JOURNAL MAPPING //-------------------------------------------------------------------------------------- - // RESULT MAPPING //-------------------------------------------------------------------------------------- - result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) + result.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo) + ) if (article.getTitle == null || article.getTitle.isEmpty) return null - result.setTitle(List(OafMapperUtils.structuredProperty(article.getTitle, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava) + result.setTitle( + List( + OafMapperUtils.structuredProperty( + article.getTitle, + ModelConstants.MAIN_TITLE_QUALIFIER, + dataInfo + ) + ).asJava + ) if (article.getDescription != null && article.getDescription.nonEmpty) result.setDescription(List(OafMapperUtils.field(article.getDescription, dataInfo)).asJava) if (article.getLanguage != null) { - val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, article.getLanguage) + val term = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, article.getLanguage) if (term != null) result.setLanguage(term) } - - val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection.breakOut) + val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => + OafMapperUtils.structuredProperty( + s.getValue, + SUBJ_CLASS, + SUBJ_CLASS, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + dataInfo + ) + )(collection.breakOut) if (subjects != null) result.setSubject(subjects.asJava) - val authors: List[Author] = article.getAuthors.asScala.zipWithIndex.map { case (a, index) => val author = new Author() author.setName(a.getForeName) @@ -216,15 +267,12 @@ object PubMedToOaf { author }(collection.breakOut) - if (authors != null && authors.nonEmpty) result.setAuthor(authors.asJava) result.setOriginalId(pidList.map(s => s.getValue).asJava) - result.setId(article.getPmid) - // END RESULT MAPPING //-------------------------------------------------------------------------------------- val id = IdentifierFactory.createIdentifier(result) @@ -234,5 +282,4 @@ object PubMedToOaf { result } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala index 45a6cfc89..2618d466a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala @@ -17,7 +17,8 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ import java.text.SimpleDateFormat -class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { +class SparkRetrieveDataciteDelta(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { val ISO_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ssZ" val simpleFormatter = new SimpleDateFormat(ISO_DATE_PATTERN) @@ -25,162 +26,190 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L val SCHOLIX_RESOURCE_PATH_NAME = "scholixResource" val DATACITE_OAF_PATH_NAME = "dataciteOAFUpdate" val PID_MAP_PATH_NAME = "pidMap" - val RESOLVED_REL_PATH_NAME ="resolvedRelation" + val RESOLVED_REL_PATH_NAME = "resolvedRelation" val SCHOLIX_PATH_NAME = "scholix" + def scholixResourcePath(workingPath: String) = s"$workingPath/$SCHOLIX_RESOURCE_PATH_NAME" + def dataciteOAFPath(workingPath: String) = s"$workingPath/$DATACITE_OAF_PATH_NAME" + def pidMapPath(workingPath: String) = s"$workingPath/$PID_MAP_PATH_NAME" + def resolvedRelationPath(workingPath: String) = s"$workingPath/$RESOLVED_REL_PATH_NAME" + def scholixPath(workingPath: String) = s"$workingPath/$SCHOLIX_PATH_NAME" - def scholixResourcePath(workingPath:String) = s"$workingPath/$SCHOLIX_RESOURCE_PATH_NAME" - def dataciteOAFPath(workingPath:String) = s"$workingPath/$DATACITE_OAF_PATH_NAME" - def pidMapPath(workingPath:String) = s"$workingPath/$PID_MAP_PATH_NAME" - def resolvedRelationPath(workingPath:String) = s"$workingPath/$RESOLVED_REL_PATH_NAME" - def scholixPath(workingPath:String) = s"$workingPath/$SCHOLIX_PATH_NAME" - - - /** - * Utility to parse Date in ISO8601 to epochMillis - * @param inputDate The String represents an input date in ISO8601 - * @return The relative epochMillis of parsed date - */ - def ISO8601toEpochMillis(inputDate:String):Long = { + /** Utility to parse Date in ISO8601 to epochMillis + * @param inputDate The String represents an input date in ISO8601 + * @return The relative epochMillis of parsed date + */ + def ISO8601toEpochMillis(inputDate: String): Long = { simpleFormatter.parse(inputDate).getTime } - - /** - * This method tries to retrieve the last collection date from all datacite - * records in HDFS. - * This method should be called before indexing scholexplorer to retrieve - * the delta of Datacite record to download, since from the generation of - * raw graph to the generation of Scholexplorer sometimes it takes 20 days - * @param spark - * @param entitiesPath - * @return the last collection date from the current scholexplorer Graph of the datacite records - */ - def retrieveLastCollectedFrom(spark:SparkSession, entitiesPath:String):Long = { + /** This method tries to retrieve the last collection date from all datacite + * records in HDFS. + * This method should be called before indexing scholexplorer to retrieve + * the delta of Datacite record to download, since from the generation of + * raw graph to the generation of Scholexplorer sometimes it takes 20 days + * @param spark + * @param entitiesPath + * @return the last collection date from the current scholexplorer Graph of the datacite records + */ + def retrieveLastCollectedFrom(spark: SparkSession, entitiesPath: String): Long = { log.info("Retrieve last entities collected From") - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] import spark.implicits._ - val entitiesDS = spark.read.load(s"$entitiesPath/*").as[Oaf].filter(o =>o.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) + val entitiesDS = spark.read + .load(s"$entitiesPath/*") + .as[Oaf] + .filter(o => o.isInstanceOf[Result]) + .map(r => r.asInstanceOf[Result]) - val date = entitiesDS.filter(r => r.getDateofcollection!= null).map(_.getDateofcollection).select(max("value")).first.getString(0) + val date = entitiesDS + .filter(r => r.getDateofcollection != null) + .map(_.getDateofcollection) + .select(max("value")) + .first + .getString(0) ISO8601toEpochMillis(date) / 1000 } - - /** - * The method of update Datacite relationships on Scholexplorer - * needs some utilities data structures - * One is the scholixResource DS that stores all the nodes in the Scholix Graph - * in format ScholixResource - * @param summaryPath the path of the summary in Scholix - * @param workingPath the working path - * @param spark the spark session - */ - def generateScholixResource(summaryPath:String, workingPath: String, spark:SparkSession) :Unit = { - implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + /** The method of update Datacite relationships on Scholexplorer + * needs some utilities data structures + * One is the scholixResource DS that stores all the nodes in the Scholix Graph + * in format ScholixResource + * @param summaryPath the path of the summary in Scholix + * @param workingPath the working path + * @param spark the spark session + */ + def generateScholixResource( + summaryPath: String, + workingPath: String, + spark: SparkSession + ): Unit = { + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] log.info("Convert All summary to ScholixResource") - spark.read.load(summaryPath).as[ScholixSummary] + spark.read + .load(summaryPath) + .as[ScholixSummary] .map(ScholixUtils.generateScholixResourceFromSummary)(scholixResourceEncoder) - .filter(r => r.getIdentifier!= null && r.getIdentifier.size>0) - .write.mode(SaveMode.Overwrite).save(s"${scholixResourcePath(workingPath)}_native") + .filter(r => r.getIdentifier != null && r.getIdentifier.size > 0) + .write + .mode(SaveMode.Overwrite) + .save(s"${scholixResourcePath(workingPath)}_native") } - /** - * This method convert the new Datacite Resource into Scholix Resource - * Needed to fill the source and the type of Scholix Relationships - * @param workingPath the Working Path - * @param spark The spark Session - */ - def addMissingScholixResource(workingPath:String, spark:SparkSession ) :Unit = { - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] - implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + /** This method convert the new Datacite Resource into Scholix Resource + * Needed to fill the source and the type of Scholix Relationships + * @param workingPath the Working Path + * @param spark The spark Session + */ + def addMissingScholixResource(workingPath: String, spark: SparkSession): Unit = { + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] import spark.implicits._ - spark.read.load(dataciteOAFPath(workingPath)).as[Oaf] + spark.read + .load(dataciteOAFPath(workingPath)) + .as[Oaf] .filter(_.isInstanceOf[Result]) .map(_.asInstanceOf[Result]) .map(ScholixUtils.generateScholixResourceFromResult) - .filter(r => r.getIdentifier!= null && r.getIdentifier.size>0) - .write.mode(SaveMode.Overwrite).save(s"${scholixResourcePath(workingPath)}_update") + .filter(r => r.getIdentifier != null && r.getIdentifier.size > 0) + .write + .mode(SaveMode.Overwrite) + .save(s"${scholixResourcePath(workingPath)}_update") val update = spark.read.load(s"${scholixResourcePath(workingPath)}_update").as[ScholixResource] val native = spark.read.load(s"${scholixResourcePath(workingPath)}_native").as[ScholixResource] - val graph = update.union(native) + val graph = update + .union(native) .groupByKey(_.getDnetIdentifier) - .reduceGroups((a,b) => if (a!= null && a.getDnetIdentifier!= null) a else b) + .reduceGroups((a, b) => if (a != null && a.getDnetIdentifier != null) a else b) .map(_._2) graph.write.mode(SaveMode.Overwrite).save(s"${scholixResourcePath(workingPath)}_graph") } + /** This method get and Transform only datacite records with + * timestamp greater than timestamp + * @param datacitePath the datacite input Path + * @param timestamp the timestamp + * @param workingPath the working path where save the generated Dataset + * @param spark SparkSession + * @param vocabularies Vocabularies needed for transformation + */ - /** - * This method get and Transform only datacite records with - * timestamp greater than timestamp - * @param datacitePath the datacite input Path - * @param timestamp the timestamp - * @param workingPath the working path where save the generated Dataset - * @param spark SparkSession - * @param vocabularies Vocabularies needed for transformation - */ - - def getDataciteUpdate(datacitePath:String, timestamp:Long, workingPath:String, spark:SparkSession,vocabularies: VocabularyGroup): Long = { + def getDataciteUpdate( + datacitePath: String, + timestamp: Long, + workingPath: String, + spark: SparkSession, + vocabularies: VocabularyGroup + ): Long = { import spark.implicits._ val ds = spark.read.load(datacitePath).as[DataciteType] - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - val total = ds.filter(_.timestamp>=timestamp).count() - if (total >0) { + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + val total = ds.filter(_.timestamp >= timestamp).count() + if (total > 0) { ds.filter(_.timestamp >= timestamp) - .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks = true)) - .flatMap(i => fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(dataciteOAFPath(workingPath)) + .flatMap(d => + DataciteToOAFTransformation + .generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks = true) + ) + .flatMap(i => fixRelations(i)) + .filter(i => i != null) + .write + .mode(SaveMode.Overwrite) + .save(dataciteOAFPath(workingPath)) } total } - /** - * After added the new ScholixResource, we need to update the scholix Pid Map - * to intersected with the new Datacite Relations - - * @param workingPath The working Path starting from save the new Map - * @param spark the spark session - */ - def generatePidMap(workingPath:String, spark:SparkSession ) :Unit = { - implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + /** After added the new ScholixResource, we need to update the scholix Pid Map + * to intersected with the new Datacite Relations + * + * @param workingPath The working Path starting from save the new Map + * @param spark the spark session + */ + def generatePidMap(workingPath: String, spark: SparkSession): Unit = { + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] import spark.implicits._ - spark.read.load(s"${scholixResourcePath(workingPath)}_graph").as[ScholixResource] - .flatMap(r=> - r.getIdentifier.asScala - .map(i =>DHPUtils.generateUnresolvedIdentifier(i.getIdentifier, i.getSchema)) - .map(t =>(t, r.getDnetIdentifier)) - )(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + spark.read + .load(s"${scholixResourcePath(workingPath)}_graph") + .as[ScholixResource] + .flatMap(r => + r.getIdentifier.asScala + .map(i => DHPUtils.generateUnresolvedIdentifier(i.getIdentifier, i.getSchema)) + .map(t => (t, r.getDnetIdentifier)) + )(Encoders.tuple(Encoders.STRING, Encoders.STRING)) .groupByKey(_._1) - .reduceGroups((a,b) => if (a!= null && a._2!= null) a else b) + .reduceGroups((a, b) => if (a != null && a._2 != null) a else b) .map(_._2)(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - .write.mode(SaveMode.Overwrite).save(pidMapPath(workingPath)) + .write + .mode(SaveMode.Overwrite) + .save(pidMapPath(workingPath)) } - /** - * This method resolve the datacite relation and filter the resolved - * relation - * @param workingPath the working path - * @param spark the spark session - */ + /** This method resolve the datacite relation and filter the resolved + * relation + * @param workingPath the working path + * @param spark the spark session + */ - def resolveUpdateRelation(workingPath:String, spark:SparkSession) :Unit = { - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val relationEncoder:Encoder[Relation] = Encoders.kryo[Relation] + def resolveUpdateRelation(workingPath: String, spark: SparkSession): Unit = { + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo[Relation] import spark.implicits._ - val pidMap = spark.read.load(pidMapPath(workingPath)).as[(String,String)] + val pidMap = spark.read.load(pidMapPath(workingPath)).as[(String, String)] - val unresolvedRelations:Dataset[(String,Relation)] = spark.read.load(dataciteOAFPath(workingPath)).as[Oaf] + val unresolvedRelations: Dataset[(String, Relation)] = spark.read + .load(dataciteOAFPath(workingPath)) + .as[Oaf] .filter(_.isInstanceOf[Relation]) .map(_.asInstanceOf[Relation]) .map { r => @@ -193,7 +222,7 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L unresolvedRelations .joinWith(pidMap, unresolvedRelations("_1").equalTo(pidMap("_1"))) .map(t => { - val r =t._1._2 + val r = t._1._2 val resolvedIdentifier = t._2._2 if (r.getSource.startsWith("unresolved")) r.setSource(resolvedIdentifier) @@ -201,56 +230,62 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L r.setTarget(resolvedIdentifier) r })(relationEncoder) - .filter(r => !(r.getSource.startsWith("unresolved") || r.getTarget.startsWith("unresolved") )) - .write.mode(SaveMode.Overwrite) + .filter(r => !(r.getSource.startsWith("unresolved") || r.getTarget.startsWith("unresolved"))) + .write + .mode(SaveMode.Overwrite) .save(resolvedRelationPath(workingPath)) } + /** This method generate scholix starting from resolved relation + * + * @param workingPath + * @param spark + */ + def generateScholixUpdate(workingPath: String, spark: SparkSession): Unit = { + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo[Relation] + implicit val intermediateEncoder: Encoder[(String, Scholix)] = + Encoders.tuple(Encoders.STRING, scholixEncoder) + val relations: Dataset[(String, Relation)] = spark.read + .load(resolvedRelationPath(workingPath)) + .as[Relation] + .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relationEncoder)) - /** - * This method generate scholix starting from resolved relation - * - * - * @param workingPath - * @param spark - */ - def generateScholixUpdate(workingPath:String, spark:SparkSession) :Unit = { - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val scholixEncoder:Encoder[Scholix] = Encoders.kryo[Scholix] - implicit val scholixResourceEncoder:Encoder[ScholixResource] = Encoders.kryo[ScholixResource] - implicit val relationEncoder:Encoder[Relation] = Encoders.kryo[Relation] - implicit val intermediateEncoder :Encoder[(String,Scholix)] = Encoders.tuple(Encoders.STRING, scholixEncoder) - - - val relations:Dataset[(String, Relation)] = spark.read.load(resolvedRelationPath(workingPath)).as[Relation].map(r =>(r.getSource,r))(Encoders.tuple(Encoders.STRING, relationEncoder)) - - val id_summary:Dataset[(String,ScholixResource)] = spark.read.load(s"${scholixResourcePath(workingPath)}_graph").as[ScholixResource].map(r => (r.getDnetIdentifier,r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) + val id_summary: Dataset[(String, ScholixResource)] = spark.read + .load(s"${scholixResourcePath(workingPath)}_graph") + .as[ScholixResource] + .map(r => (r.getDnetIdentifier, r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) id_summary.cache() - relations.joinWith(id_summary, relations("_1").equalTo(id_summary("_1")),"inner") - .map(t => (t._1._2.getTarget,ScholixUtils.scholixFromSource(t._1._2, t._2._2))) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/scholix_one_verse") + relations + .joinWith(id_summary, relations("_1").equalTo(id_summary("_1")), "inner") + .map(t => (t._1._2.getTarget, ScholixUtils.scholixFromSource(t._1._2, t._2._2))) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/scholix_one_verse") - val source_scholix:Dataset[(String, Scholix)] =spark.read.load(s"$workingPath/scholix_one_verse").as[(String,Scholix)] + val source_scholix: Dataset[(String, Scholix)] = + spark.read.load(s"$workingPath/scholix_one_verse").as[(String, Scholix)] - source_scholix.joinWith(id_summary, source_scholix("_1").equalTo(id_summary("_1")),"inner") + source_scholix + .joinWith(id_summary, source_scholix("_1").equalTo(id_summary("_1")), "inner") .map(t => { - val target:ScholixResource =t._2._2 - val scholix:Scholix = t._1._2 - ScholixUtils.generateCompleteScholix(scholix,target) - })(scholixEncoder).write.mode(SaveMode.Overwrite).save(s"$workingPath/scholix") + val target: ScholixResource = t._2._2 + val scholix: Scholix = t._1._2 + ScholixUtils.generateCompleteScholix(scholix, target) + })(scholixEncoder) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/scholix") } - - - - - /** - * Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ override def run(): Unit = { val sourcePath = parser.get("sourcePath") log.info(s"SourcePath is '$sourcePath'") @@ -258,7 +293,7 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L val datacitePath = parser.get("datacitePath") log.info(s"DatacitePath is '$datacitePath'") - val workingPath = parser.get("workingSupportPath") + val workingPath = parser.get("workingSupportPath") log.info(s"workingPath is '$workingPath'") val isLookupUrl: String = parser.get("isLookupUrl") @@ -268,38 +303,43 @@ class SparkRetrieveDataciteDelta (propertyPath:String, args:Array[String], log:L val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) require(vocabularies != null) - - val updateDS:Boolean = "true".equalsIgnoreCase(parser.get("updateDS")) + val updateDS: Boolean = "true".equalsIgnoreCase(parser.get("updateDS")) log.info(s"updateDS is '$updateDS'") var lastCollectionDate = 0L if (updateDS) { generateScholixResource(s"$sourcePath/provision/summaries", workingPath, spark) log.info("Retrieve last entities collected From starting from scholix Graph") - lastCollectionDate = retrieveLastCollectedFrom(spark, s"$sourcePath/entities") - } - else { + lastCollectionDate = retrieveLastCollectedFrom(spark, s"$sourcePath/entities") + } else { val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) fs.delete(new Path(s"${scholixResourcePath(workingPath)}_native"), true) - fs.rename(new Path(s"${scholixResourcePath(workingPath)}_graph"), new Path(s"${scholixResourcePath(workingPath)}_native")) - lastCollectionDate = retrieveLastCollectedFrom(spark, dataciteOAFPath(workingPath)) + fs.rename( + new Path(s"${scholixResourcePath(workingPath)}_graph"), + new Path(s"${scholixResourcePath(workingPath)}_native") + ) + lastCollectionDate = retrieveLastCollectedFrom(spark, dataciteOAFPath(workingPath)) } - val numRecords = getDataciteUpdate(datacitePath, lastCollectionDate, workingPath, spark, vocabularies) - if (numRecords>0) { - addMissingScholixResource(workingPath,spark) + val numRecords = + getDataciteUpdate(datacitePath, lastCollectionDate, workingPath, spark, vocabularies) + if (numRecords > 0) { + addMissingScholixResource(workingPath, spark) generatePidMap(workingPath, spark) - resolveUpdateRelation(workingPath,spark) + resolveUpdateRelation(workingPath, spark) generateScholixUpdate(workingPath, spark) } } } - object SparkRetrieveDataciteDelta { val log: Logger = LoggerFactory.getLogger(SparkRetrieveDataciteDelta.getClass) def main(args: Array[String]): Unit = { - new SparkRetrieveDataciteDelta("/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json", args, log).initialize().run() + new SparkRetrieveDataciteDelta( + "/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json", + args, + log + ).initialize().run() } } diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index 5bb6ba67d..ca1dbc665 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.datacite - import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf @@ -20,95 +19,90 @@ import java.util.Locale import scala.io.Source @ExtendWith(Array(classOf[MockitoExtension])) -class DataciteToOAFTest extends AbstractVocabularyTest{ +class DataciteToOAFTest extends AbstractVocabularyTest { - private var workingDir:Path = null + private var workingDir: Path = null val log: Logger = LoggerFactory.getLogger(getClass) @BeforeEach - def setUp() :Unit = { + def setUp(): Unit = { - workingDir= Files.createTempDirectory(getClass.getSimpleName) + workingDir = Files.createTempDirectory(getClass.getSimpleName) super.setUpVocabulary() } @AfterEach - def tearDown() :Unit = { + def tearDown(): Unit = { FileUtils.deleteDirectory(workingDir.toFile) } - @Test - def testDateMapping:Unit = { + def testDateMapping: Unit = { val inputDate = "2021-07-14T11:52:54+0000" val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) val dt = ISO8601FORMAT.parse(inputDate) println(dt.getTime) - } - @Test def testConvert(): Unit = { - val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath val conf = new SparkConf() - val spark:SparkSession = SparkSession.builder().config(conf) + val spark: SparkSession = SparkSession + .builder() + .config(conf) .appName(getClass.getSimpleName) .master("local[*]") .getOrCreate() - - - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] val instance = new GenerateDataciteDatasetSpark(null, null, log) val targetPath = s"$workingDir/result" - instance.generateDataciteDataset(path, exportLinks = true, vocabularies,targetPath, spark) + instance.generateDataciteDataset(path, exportLinks = true, vocabularies, targetPath, spark) import spark.implicits._ - val nativeSize =spark.read.load(path).count() - + val nativeSize = spark.read.load(path).count() assertEquals(100, nativeSize) - val result:Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] + val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] - - result.map(s => s.getClass.getSimpleName).groupBy(col("value").alias("class")).agg(count("value").alias("Total")).show(false) + result + .map(s => s.getClass.getSimpleName) + .groupBy(col("value").alias("class")) + .agg(count("value").alias("Total")) + .show(false) val t = spark.read.load(targetPath).count() - assertTrue(t >0) - + assertTrue(t > 0) spark.stop() - - - } - @Test - def testMapping() :Unit = { - val record =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json")).mkString + def testMapping(): Unit = { + val record = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json") + ) + .mkString val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) - val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true ) + val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true) res.foreach(r => { - println (mapper.writeValueAsString(r)) + println(mapper.writeValueAsString(r)) println("----------------------------") }) - - } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index 893a6e628..ea742a04a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -20,14 +20,13 @@ import scala.io.Source import scala.xml.pull.XMLEventReader @ExtendWith(Array(classOf[MockitoExtension])) -class BioScholixTest extends AbstractVocabularyTest{ - +class BioScholixTest extends AbstractVocabularyTest { val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false) + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) @BeforeEach - def setUp() :Unit = { + def setUp(): Unit = { super.setUpVocabulary() } @@ -38,52 +37,54 @@ class BioScholixTest extends AbstractVocabularyTest{ } object GzFileIterator { + def apply(is: InputStream, encoding: String) = { new BufferedReaderIterator( - new BufferedReader( - new InputStreamReader( - new GZIPInputStream( - is), encoding))) + new BufferedReader(new InputStreamReader(new GZIPInputStream(is), encoding)) + ) } } - - - @Test def testEBIData() = { - val inputXML = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")).mkString + val inputXML = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")) + .mkString val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes())) - new PMParser(xml).foreach(s =>println(mapper.writeValueAsString(s))) + new PMParser(xml).foreach(s => println(mapper.writeValueAsString(s))) } - @Test def testPubmedToOaf(): Unit = { assertNotNull(vocabularies) assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")).mkString - val r:List[Oaf] = records.lines.toList.map(s=>mapper.readValue(s, classOf[PMArticle])).map(a => PubMedToOaf.convert(a, vocabularies)) + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")) + .mkString + val r: List[Oaf] = records.lines.toList + .map(s => mapper.readValue(s, classOf[PMArticle])) + .map(a => PubMedToOaf.convert(a, vocabularies)) assertEquals(10, r.size) - assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p))) + assertTrue( + r.map(p => p.asInstanceOf[Result]) + .flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)) + .exists(p => "0037".equalsIgnoreCase(p)) + ) println(mapper.writeValueAsString(r.head)) - - } - @Test - def testPDBToOAF():Unit = { + def testPDBToOAF(): Unit = { assertNotNull(vocabularies) assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pdb_dump")).mkString + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pdb_dump")) + .mkString records.lines.foreach(s => assertTrue(s.nonEmpty)) - val result:List[Oaf]= records.lines.toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) - - + val result: List[Oaf] = records.lines.toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) assertTrue(result.nonEmpty) result.foreach(r => assertNotNull(r)) @@ -93,19 +94,18 @@ class BioScholixTest extends AbstractVocabularyTest{ } - @Test - def testUNIprotToOAF():Unit = { + def testUNIprotToOAF(): Unit = { assertNotNull(vocabularies) assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump")).mkString + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump")) + .mkString records.lines.foreach(s => assertTrue(s.nonEmpty)) - val result:List[Oaf]= records.lines.toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) - - + val result: List[Oaf] = records.lines.toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) assertTrue(result.nonEmpty) result.foreach(r => assertNotNull(r)) @@ -115,35 +115,42 @@ class BioScholixTest extends AbstractVocabularyTest{ } - case class EBILinks(relType:String, date:String, title:String, pmid:String, targetPid:String, targetPidType:String) {} + case class EBILinks( + relType: String, + date: String, + title: String, + pmid: String, + targetPid: String, + targetPidType: String + ) {} - def parse_ebi_links(input:String):List[EBILinks] ={ + def parse_ebi_links(input: String): List[EBILinks] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) - val pmid = (json \ "publication" \"pmid").extract[String] + val pmid = (json \ "publication" \ "pmid").extract[String] for { - JObject(link) <- json \\ "Link" - JField("Target",JObject(target)) <- link - JField("RelationshipType",JObject(relType)) <- link - JField("Name", JString(relation)) <- relType - JField("PublicationDate",JString(publicationDate)) <- link - JField("Title", JString(title)) <- target - JField("Identifier",JObject(identifier)) <- target - JField("IDScheme", JString(idScheme)) <- identifier - JField("ID", JString(id)) <- identifier + JObject(link) <- json \\ "Link" + JField("Target", JObject(target)) <- link + JField("RelationshipType", JObject(relType)) <- link + JField("Name", JString(relation)) <- relType + JField("PublicationDate", JString(publicationDate)) <- link + JField("Title", JString(title)) <- target + JField("Identifier", JObject(identifier)) <- target + JField("IDScheme", JString(idScheme)) <- identifier + JField("ID", JString(id)) <- identifier } yield EBILinks(relation, publicationDate, title, pmid, id, idScheme) } - @Test - def testCrossrefLinksToOAF():Unit = { + def testCrossrefLinksToOAF(): Unit = { - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/crossref_links")).mkString + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/crossref_links")) + .mkString records.lines.foreach(s => assertTrue(s.nonEmpty)) - - val result:List[Oaf] =records.lines.map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList + val result: List[Oaf] = records.lines.map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList assertNotNull(result) assertTrue(result.nonEmpty) @@ -153,36 +160,41 @@ class BioScholixTest extends AbstractVocabularyTest{ } @Test - def testEBILinksToOAF():Unit = { - val iterator = GzFileIterator(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz"), "UTF-8") + def testEBILinksToOAF(): Unit = { + val iterator = GzFileIterator( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz"), + "UTF-8" + ) val data = iterator.next() - val res = BioDBToOAF.parse_ebi_links(BioDBToOAF.extractEBILinksFromDump(data).links).filter(BioDBToOAF.EBITargetLinksFilter).flatMap(BioDBToOAF.convertEBILinksToOaf) + val res = BioDBToOAF + .parse_ebi_links(BioDBToOAF.extractEBILinksFromDump(data).links) + .filter(BioDBToOAF.EBITargetLinksFilter) + .flatMap(BioDBToOAF.convertEBILinksToOaf) print(res.length) - println(mapper.writeValueAsString(res.head)) } - - - @Test - def scholixResolvedToOAF():Unit ={ + def scholixResolvedToOAF(): Unit = { - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved")).mkString + val records: String = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved") + ) + .mkString records.lines.foreach(s => assertTrue(s.nonEmpty)) implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - val l:List[ScholixResolved] = records.lines.map{input => + val l: List[ScholixResolved] = records.lines.map { input => lazy val json = parse(input) json.extract[ScholixResolved] }.toList - - val result:List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) + val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) assertTrue(result.nonEmpty) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 3822f40b5..20471973a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -16,10 +16,22 @@ import java.time.LocalDate import java.time.format.DateTimeFormatter import scala.collection.JavaConverters._ +case class HostedByItemType( + id: String, + officialname: String, + issn: String, + eissn: String, + lissn: String, + openAccess: Boolean +) {} -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} - -case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){} +case class DoiBoostAffiliation( + PaperId: Long, + AffiliationId: Long, + GridId: Option[String], + OfficialPage: Option[String], + DisplayName: Option[String] +) {} object DoiBoostMappingUtil { @@ -43,9 +55,19 @@ object DoiBoostMappingUtil { val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" val DOI_PREFIX = "10." - val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;") + val invalidName = List( + ",", + "none none", + "none, none", + "none &na;", + "(:null)", + "test test test", + "test test", + "test", + "&na; &na;" + ) - def toActionSet(item:Oaf) :(String, String) = { + def toActionSet(item: Oaf): (String, String) = { val mapper = new ObjectMapper() item match { @@ -75,59 +97,56 @@ object DoiBoostMappingUtil { } - - def toHostedByItem(input:String): (String, HostedByItemType) = { + def toHostedByItem(input: String): (String, HostedByItemType) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] (c.keys.head, c.values.head) } - - def toISSNPair(publication: Publication) : (String, Publication) = { + def toISSNPair(publication: Publication): (String, Publication) = { val issn = if (publication.getJournal == null) null else publication.getJournal.getIssnPrinted - val eissn =if (publication.getJournal == null) null else publication.getJournal.getIssnOnline - val lissn =if (publication.getJournal == null) null else publication.getJournal.getIssnLinking + val eissn = if (publication.getJournal == null) null else publication.getJournal.getIssnOnline + val lissn = if (publication.getJournal == null) null else publication.getJournal.getIssnLinking - if (issn!= null && issn.nonEmpty) + if (issn != null && issn.nonEmpty) (issn, publication) - else if(eissn!= null && eissn.nonEmpty) + else if (eissn != null && eissn.nonEmpty) (eissn, publication) - else if(lissn!= null && lissn.nonEmpty) + else if (lissn != null && lissn.nonEmpty) (lissn, publication) else (publication.getId, publication) } - - - - def generateGridAffiliationId(gridId:String) :String = { + def generateGridAffiliationId(gridId: String): String = { s"20|grid________::${DHPUtils.md5(gridId.toLowerCase().trim())}" } - - def fixResult(result: Dataset) :Dataset = { + def fixResult(result: Dataset): Dataset = { val instanceType = extractInstance(result) if (instanceType.isDefined) { result.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype)) } - result.getInstance().asScala.foreach(i => { - i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY) - }) + result + .getInstance() + .asScala + .foreach(i => { + i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY) + }) result } - - def decideAccessRight(lic : Field[String], date:String) : AccessRight = { - if(lic == null){ + def decideAccessRight(lic: Field[String], date: String): AccessRight = { + if (lic == null) { //Default value Unknown return getUnknownQualifier() } - val license : String = lic.getValue + val license: String = lic.getValue //CC licenses - if(license.startsWith("cc") || + if ( + license.startsWith("cc") || license.startsWith("http://creativecommons.org/licenses") || license.startsWith("https://creativecommons.org/licenses") || @@ -137,40 +156,44 @@ object DoiBoostMappingUtil { license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || //APA (considered OPEN also by Unpaywall) - license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")){ + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx") + ) { - val oaq : AccessRight = getOpenAccessQualifier() + val oaq: AccessRight = getOpenAccessQualifier() oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) return oaq } //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) - if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ + if ( + license.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) { val now = java.time.LocalDate.now - try{ + try { val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) - if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ - val oaq : AccessRight = getOpenAccessQualifier() + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = getOpenAccessQualifier() oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) return oaq - } - else{ + } else { return getEmbargoedAccessQualifier() } - }catch { + } catch { case e: Exception => { - try{ - val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) - if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ - val oaq : AccessRight = getOpenAccessQualifier() - oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) - return oaq - } - else{ - return getEmbargoedAccessQualifier() - } - }catch{ + try { + val pub_date = + LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } else { + return getEmbargoedAccessQualifier() + } + } catch { case ex: Exception => return getClosedAccessQualifier() } } @@ -183,64 +206,91 @@ object DoiBoostMappingUtil { } + def getOpenAccessQualifier(): AccessRight = { - - def getOpenAccessQualifier():AccessRight = { - - OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN,"Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight( + ModelConstants.ACCESS_RIGHT_OPEN, + "Open Access", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - def getRestrictedQualifier():AccessRight = { - OafMapperUtils.accessRight( "RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getRestrictedQualifier(): AccessRight = { + OafMapperUtils.accessRight( + "RESTRICTED", + "Restricted", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - - def getUnknownQualifier():AccessRight = { - OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getUnknownQualifier(): AccessRight = { + OafMapperUtils.accessRight( + ModelConstants.UNKNOWN, + ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - - def getEmbargoedAccessQualifier():AccessRight = { - OafMapperUtils.accessRight("EMBARGO","Embargo",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getEmbargoedAccessQualifier(): AccessRight = { + OafMapperUtils.accessRight( + "EMBARGO", + "Embargo", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - def getClosedAccessQualifier():AccessRight = { - OafMapperUtils.accessRight("CLOSED","Closed Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getClosedAccessQualifier(): AccessRight = { + OafMapperUtils.accessRight( + "CLOSED", + "Closed Access", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - - def extractInstance(r:Result):Option[Instance] = { - r.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty) + def extractInstance(r: Result): Option[Instance] = { + r.getInstance() + .asScala + .find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty) } - def fixPublication(input:((String,Publication), (String,HostedByItemType))): Publication = { + def fixPublication(input: ((String, Publication), (String, HostedByItemType))): Publication = { val publication = input._1._2 val item = if (input._2 != null) input._2._2 else null - val instanceType:Option[Instance] = extractInstance(publication) + val instanceType: Option[Instance] = extractInstance(publication) if (instanceType.isDefined) { - publication.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype)) + publication + .getInstance() + .asScala + .foreach(i => i.setInstancetype(instanceType.get.getInstancetype)) } - publication.getInstance().asScala.foreach(i => { - var hb = new KeyValue - if (item != null) { - hb.setValue(item.officialname) - hb.setKey(item.id) - if (item.openAccess) { - i.setAccessright(getOpenAccessQualifier()) - i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) - } + publication + .getInstance() + .asScala + .foreach(i => { + var hb = new KeyValue + if (item != null) { + hb.setValue(item.officialname) + hb.setKey(item.id) + if (item.openAccess) { + i.setAccessright(getOpenAccessQualifier()) + i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) + } - } - else { - hb = ModelConstants.UNKNOWN_REPOSITORY - } - i.setHostedby(hb) - }) + } else { + hb = ModelConstants.UNKNOWN_REPOSITORY + } + i.setHostedby(hb) + }) publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) @@ -270,17 +320,22 @@ object DoiBoostMappingUtil { if (publication.getTitle == null || publication.getTitle.size == 0) return false - - val s = publication.getTitle.asScala.count(p => p.getValue != null - && p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]")) + val s = publication.getTitle.asScala.count(p => + p.getValue != null + && p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]") + ) if (s == 0) return false // fixes #4360 (test publisher) - val publisher = if (publication.getPublisher != null) publication.getPublisher.getValue else null + val publisher = + if (publication.getPublisher != null) publication.getPublisher.getValue else null - if (publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher.equalsIgnoreCase("CrossRef Test Account"))) { + if ( + publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher + .equalsIgnoreCase("CrossRef Test Account")) + ) { return false; } @@ -288,18 +343,12 @@ object DoiBoostMappingUtil { if (publication.getAuthor == null || publication.getAuthor.size() == 0) return false - //filter invalid author val authors = publication.getAuthor.asScala.map(s => { if (s.getFullname.nonEmpty) { s.getFullname - } - else - s"${ - s.getName - } ${ - s.getSurname - }" + } else + s"${s.getName} ${s.getSurname}" }) val c = authors.count(isValidAuthorName) @@ -307,13 +356,16 @@ object DoiBoostMappingUtil { return false // fixes #4368 - if (authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(publication.getPublisher.getValue)) + if ( + authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase( + publication.getPublisher.getValue + ) + ) return false true } - def isValidAuthorName(fullName: String): Boolean = { if (fullName == null || fullName.isEmpty) return false @@ -322,32 +374,47 @@ object DoiBoostMappingUtil { true } - def generateDataInfo(trust: String): DataInfo = { val di = new DataInfo di.setDeletedbyinference(false) di.setInferred(false) di.setInvisible(false) di.setTrust(trust) - di.setProvenanceaction(OafMapperUtils.qualifier(ModelConstants.SYSIMPORT_ACTIONSET,ModelConstants.SYSIMPORT_ACTIONSET, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS)) + di.setProvenanceaction( + OafMapperUtils.qualifier( + ModelConstants.SYSIMPORT_ACTIONSET, + ModelConstants.SYSIMPORT_ACTIONSET, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS + ) + ) di } - - - def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = { + def createSP( + value: String, + classId: String, + className: String, + schemeId: String, + schemeName: String + ): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName)) + sp.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName)) sp.setValue(value) sp } - - - def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = { + def createSP( + value: String, + classId: String, + className: String, + schemeId: String, + schemeName: String, + dataInfo: DataInfo + ): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName)) + sp.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName)) sp.setValue(value) sp.setDataInfo(dataInfo) sp @@ -356,17 +423,20 @@ object DoiBoostMappingUtil { def createSP(value: String, classId: String, schemeId: String): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId)) + sp.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId)) sp.setValue(value) sp } - - - def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = { + def createSP( + value: String, + classId: String, + schemeId: String, + dataInfo: DataInfo + ): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId)) + sp.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId)) sp.setValue(value) sp.setDataInfo(dataInfo) sp @@ -382,7 +452,6 @@ object DoiBoostMappingUtil { } - def createUnpayWallCollectedFrom(): KeyValue = { val cf = new KeyValue @@ -401,15 +470,11 @@ object DoiBoostMappingUtil { } - - def generateIdentifier (oaf: Result, doi: String): String = { - val id = DHPUtils.md5 (doi.toLowerCase) + def generateIdentifier(oaf: Result, doi: String): String = { + val id = DHPUtils.md5(doi.toLowerCase) s"50|${doiBoostNSPREFIX}${SEPARATOR}${id}" } - - - def createMAGCollectedFrom(): KeyValue = { val cf = new KeyValue @@ -424,19 +489,21 @@ object DoiBoostMappingUtil { tmp.setValue(value) tmp - } def isEmpty(x: String) = x == null || x.trim.isEmpty - def normalizeDoi(input : String) :String ={ - if(input == null) + def normalizeDoi(input: String): String = { + if (input == null) return null - val replaced = input.replaceAll("(?:\\n|\\r|\\t|\\s)", "").toLowerCase.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) - if (isEmpty(replaced)) + val replaced = input + .replaceAll("(?:\\n|\\r|\\t|\\s)", "") + .toLowerCase + .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) + if (isEmpty(replaced)) return null - if(replaced.indexOf("10.") < 0) + if (replaced.indexOf("10.") < 0) return null val ret = replaced.substring(replaced.indexOf("10.")) @@ -446,9 +513,6 @@ object DoiBoostMappingUtil { return ret - } - - } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala index f13900abe..b6152526d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala @@ -17,22 +17,29 @@ object SparkGenerateDOIBoostActionSet { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] - implicit val mapEncoderAS: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING) + implicit val mapEncoderAS: Encoder[(String, String)] = + Encoders.tuple(Encoders.STRING, Encoders.STRING) - implicit val mapEncoderAtomiAction: Encoder[AtomicAction[OafDataset]] = Encoders.kryo[AtomicAction[OafDataset]] + implicit val mapEncoderAtomiAction: Encoder[AtomicAction[OafDataset]] = + Encoders.kryo[AtomicAction[OafDataset]] val dbPublicationPath = parser.get("dbPublicationPath") val dbDatasetPath = parser.get("dbDatasetPath") @@ -41,35 +48,61 @@ object SparkGenerateDOIBoostActionSet { val dbOrganizationPath = parser.get("dbOrganizationPath") val sequenceFilePath = parser.get("sFilePath") - val asDataset = spark.read.load(dbDatasetPath).as[OafDataset] + val asDataset = spark.read + .load(dbDatasetPath) + .as[OafDataset] .filter(p => p != null || p.getId != null) .map(d => DoiBoostMappingUtil.fixResult(d)) - .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) - - val asPublication = spark.read.load(dbPublicationPath).as[Publication] + val asPublication = spark.read + .load(dbPublicationPath) + .as[Publication] .filter(p => p != null || p.getId != null) - .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) + val asOrganization = spark.read + .load(dbOrganizationPath) + .as[Organization] + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) - val asOrganization = spark.read.load(dbOrganizationPath).as[Organization] - .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - val asCRelation = spark.read.load(crossRefRelation).as[Relation] + val asCRelation = spark.read + .load(crossRefRelation) + .as[Relation] .filter(r => r != null && r.getSource != null && r.getTarget != null) - .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) + val asRelAffiliation = spark.read + .load(dbaffiliationRelationPath) + .as[Relation] + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) - val asRelAffiliation = spark.read.load(dbaffiliationRelationPath).as[Relation] - .map(d => DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - val d: Dataset[(String, String)] = asDataset.union(asPublication).union(asOrganization).union(asCRelation).union(asRelAffiliation) - - - d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec]) + val d: Dataset[(String, String)] = asDataset + .union(asPublication) + .union(asOrganization) + .union(asCRelation) + .union(asRelAffiliation) + d.rdd + .repartition(6000) + .map(s => (new Text(s._1), new Text(s._2))) + .saveAsHadoopFile( + s"$sequenceFilePath", + classOf[Text], + classOf[Text], + classOf[SequenceFileOutputFormat[Text, Text]], + classOf[GzipCodec] + ) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 91fe56cba..239082d45 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -15,8 +15,8 @@ import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -object SparkGenerateDoiBoost { +object SparkGenerateDoiBoost { def extractIdGRID(input: String): List[(String, String)] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -26,28 +26,32 @@ object SparkGenerateDoiBoost { val grids: List[String] = for { - JObject(pid) <- json \ "pid" + JObject(pid) <- json \ "pid" JField("qualifier", JObject(qualifier)) <- pid - JField("classid", JString(classid)) <- qualifier - JField("value", JString(vl)) <- pid + JField("classid", JString(classid)) <- qualifier + JField("value", JString(vl)) <- pid if classid == "GRID" } yield vl grids.map(g => (id, s"unresolved::grid::${g.toLowerCase}"))(collection.breakOut) } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() import spark.implicits._ @@ -55,7 +59,8 @@ object SparkGenerateDoiBoost { val workingDirPath = parser.get("workingPath") val openaireOrganizationPath = parser.get("openaireOrganizationPath") - val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { + val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] + with Serializable { override def zero: Publication = new Publication override def reduce(b: Publication, a: (String, Publication)): Publication = { @@ -65,8 +70,7 @@ object SparkGenerateDoiBoost { a._2.setId(a._1) return a._2 } - } - else { + } else { if (a != null && a._2 != null) { b.mergeFrom(a._2) b.setId(a._1) @@ -82,8 +86,7 @@ object SparkGenerateDoiBoost { if (b1 == null) { if (b2 != null) return b2 - } - else { + } else { if (b2 != null) { b1.mergeFrom(b2) val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) @@ -103,17 +106,19 @@ object SparkGenerateDoiBoost { override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication] } - implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] - implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub) + implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = + Encoders.tuple(Encoders.STRING, mapEncoderPub) implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] logger.info("Phase 2) Join Crossref with UnpayWall") - val crossrefPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p)) - val uwPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/uwPublication").as[Publication].map(p => (p.getId, p)) + val crossrefPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p)) + val uwPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/uwPublication").as[Publication].map(p => (p.getId, p)) def applyMerge(item: ((String, Publication), (String, Publication))): Publication = { val crossrefPub = item._1._2 @@ -127,86 +132,140 @@ object SparkGenerateDoiBoost { crossrefPub } - crossrefPublication.joinWith(uwPublication, crossrefPublication("_1").equalTo(uwPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/firstJoin") + crossrefPublication + .joinWith(uwPublication, crossrefPublication("_1").equalTo(uwPublication("_1")), "left") + .map(applyMerge) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/firstJoin") logger.info("Phase 3) Join Result with ORCID") - val fj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p)) - val orcidPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p)) - fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/secondJoin") + val fj: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p)) + val orcidPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p)) + fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left") + .map(applyMerge) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/secondJoin") logger.info("Phase 4) Join Result with MAG") - val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p)) + val sj: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p)) - val magPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p)) - sj.joinWith(magPublication, sj("_1").equalTo(magPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublication") + val magPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p)) + sj.joinWith(magPublication, sj("_1").equalTo(magPublication("_1")), "left") + .map(applyMerge) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublication") + val doiBoostPublication: Dataset[(String, Publication)] = spark.read + .load(s"$workingDirPath/doiBoostPublication") + .as[Publication] + .filter(p => DoiBoostMappingUtil.filterPublication(p)) + .map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder) - val doiBoostPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublication").as[Publication].filter(p => DoiBoostMappingUtil.filterPublication(p)).map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder) + val hostedByDataset: Dataset[(String, HostedByItemType)] = spark.createDataset( + spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem) + ) - val hostedByDataset: Dataset[(String, HostedByItemType)] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem)) - - - doiBoostPublication.joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left") + doiBoostPublication + .joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left") .map(DoiBoostMappingUtil.fixPublication) .map(p => (p.getId, p)) .groupByKey(_._1) .agg(crossrefAggregator.toColumn) .map(p => p._2) - .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationFiltered") + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublicationFiltered") val affiliationPath = parser.get("affiliationPath") val paperAffiliationPath = parser.get("paperAffiliationPath") - val affiliation = spark.read.load(affiliationPath).select(col("AffiliationId"), col("GridId"), col("OfficialPage"), col("DisplayName")) - - val paperAffiliation = spark.read.load(paperAffiliationPath).select(col("AffiliationId").alias("affId"), col("PaperId")) + val affiliation = spark.read + .load(affiliationPath) + .select(col("AffiliationId"), col("GridId"), col("OfficialPage"), col("DisplayName")) + val paperAffiliation = spark.read + .load(paperAffiliationPath) + .select(col("AffiliationId").alias("affId"), col("PaperId")) val a: Dataset[DoiBoostAffiliation] = paperAffiliation .joinWith(affiliation, paperAffiliation("affId").equalTo(affiliation("AffiliationId"))) - .select(col("_1.PaperId"), col("_2.AffiliationId"), col("_2.GridId"), col("_2.OfficialPage"), col("_2.DisplayName")).as[DoiBoostAffiliation] + .select( + col("_1.PaperId"), + col("_2.AffiliationId"), + col("_2.GridId"), + col("_2.OfficialPage"), + col("_2.DisplayName") + ) + .as[DoiBoostAffiliation] + val magPubs: Dataset[(String, Publication)] = spark.read + .load(s"$workingDirPath/doiBoostPublicationFiltered") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))( + tupleForJoinEncoder + ) + .filter(s => s._1 != null) - val magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublicationFiltered").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))(tupleForJoinEncoder).filter(s => s._1 != null) + magPubs + .joinWith(a, magPubs("_1").equalTo(a("PaperId"))) + .flatMap(item => { + val pub: Publication = item._1._2 + val affiliation = item._2 + val affId: String = + if (affiliation.GridId.isDefined) + s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" + else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) + val r: Relation = new Relation + r.setSource(pub.getId) + r.setTarget(affId) + r.setRelType(ModelConstants.RESULT_ORGANIZATION) + r.setRelClass(ModelConstants.HAS_AUTHOR_INSTITUTION) + r.setSubRelType(ModelConstants.AFFILIATION) + r.setDataInfo(pub.getDataInfo) + r.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) + val r1: Relation = new Relation + r1.setTarget(pub.getId) + r1.setSource(affId) + r1.setRelType(ModelConstants.RESULT_ORGANIZATION) + r1.setRelClass(ModelConstants.IS_AUTHOR_INSTITUTION_OF) + r1.setSubRelType(ModelConstants.AFFILIATION) + r1.setDataInfo(pub.getDataInfo) + r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) + List(r, r1) + })(mapEncoderRel) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + val unresolvedRels: Dataset[(String, Relation)] = spark.read + .load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + .as[Relation] + .map(r => { - magPubs.joinWith(a, magPubs("_1").equalTo(a("PaperId"))).flatMap(item => { - val pub: Publication = item._1._2 - val affiliation = item._2 - val affId: String = if (affiliation.GridId.isDefined) s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) - val r: Relation = new Relation - r.setSource(pub.getId) - r.setTarget(affId) - r.setRelType(ModelConstants.RESULT_ORGANIZATION) - r.setRelClass(ModelConstants.HAS_AUTHOR_INSTITUTION) - r.setSubRelType(ModelConstants.AFFILIATION) - r.setDataInfo(pub.getDataInfo) - r.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) - val r1: Relation = new Relation - r1.setTarget(pub.getId) - r1.setSource(affId) - r1.setRelType(ModelConstants.RESULT_ORGANIZATION) - r1.setRelClass(ModelConstants.IS_AUTHOR_INSTITUTION_OF) - r1.setSubRelType(ModelConstants.AFFILIATION) - r1.setDataInfo(pub.getDataInfo) - r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) - List(r, r1) - })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + if (r.getSource.startsWith("unresolved")) + (r.getSource, r) + else if (r.getTarget.startsWith("unresolved")) + (r.getTarget, r) + else + ("resolved", r) + })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) + val openaireOrganization: Dataset[(String, String)] = spark.read + .text(openaireOrganizationPath) + .as[String] + .flatMap(s => extractIdGRID(s)) + .groupByKey(_._2) + .reduceGroups((x, y) => if (x != null) x else y) + .map(_._2) - val unresolvedRels: Dataset[(String, Relation)] = spark.read.load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved").as[Relation].map(r => { - - if (r.getSource.startsWith("unresolved")) - (r.getSource, r) - else if (r.getTarget.startsWith("unresolved")) - (r.getTarget, r) - else - ("resolved", r) - })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) - - val openaireOrganization: Dataset[(String, String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x, y) => if (x != null) x else y).map(_._2) - - unresolvedRels.joinWith(openaireOrganization, unresolvedRels("_1").equalTo(openaireOrganization("_2"))) + unresolvedRels + .joinWith(openaireOrganization, unresolvedRels("_1").equalTo(openaireOrganization("_2"))) .map { x => val currentRels = x._1._2 val currentOrgs = x._2 @@ -216,26 +275,35 @@ object SparkGenerateDoiBoost { else currentRels.setTarget(currentOrgs._1) currentRels - }.filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") - - magPubs.joinWith(a, magPubs("_1").equalTo(a("PaperId"))).map(item => { - val affiliation = item._2 - if (affiliation.GridId.isEmpty) { - val o = new Organization - o.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) - o.setDataInfo(DoiBoostMappingUtil.generateDataInfo()) - o.setId(DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString)) - o.setOriginalId(List(affiliation.AffiliationId.toString).asJava) - if (affiliation.DisplayName.nonEmpty) - o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get)) - if (affiliation.OfficialPage.isDefined) - o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get)) - o.setCountry(ModelConstants.UNKNOWN_COUNTRY) - o } - else - null - }).filter(o => o != null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization") + .filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublicationAffiliation") + + magPubs + .joinWith(a, magPubs("_1").equalTo(a("PaperId"))) + .map(item => { + val affiliation = item._2 + if (affiliation.GridId.isEmpty) { + val o = new Organization + o.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) + o.setDataInfo(DoiBoostMappingUtil.generateDataInfo()) + o.setId(DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString)) + o.setOriginalId(List(affiliation.AffiliationId.toString).asJava) + if (affiliation.DisplayName.nonEmpty) + o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get)) + if (affiliation.OfficialPage.isDefined) + o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get)) + o.setCountry(ModelConstants.UNKNOWN_COUNTRY) + o + } else + null + }) + .filter(o => o != null) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostOrganization") } } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index edca4a180..60f4e88c8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -18,70 +18,74 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex -case class CrossrefDT(doi: String, json:String, timestamp: Long) {} +case class CrossrefDT(doi: String, json: String, timestamp: Long) {} case class mappingAffiliation(name: String) {} -case class mappingAuthor(given: Option[String], family: String, sequence:Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation]) {} +case class mappingAuthor( + given: Option[String], + family: String, + sequence: Option[String], + ORCID: Option[String], + affiliation: Option[mappingAffiliation] +) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} - case object Crossref2Oaf { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val mappingCrossrefType = Map( - "book-section" -> "publication", - "book" -> "publication", - "book-chapter" -> "publication", - "book-part" -> "publication", - "book-series" -> "publication", - "book-set" -> "publication", - "book-track" -> "publication", - "edited-book" -> "publication", - "reference-book" -> "publication", - "monograph" -> "publication", - "journal-article" -> "publication", - "dissertation" -> "publication", - "other" -> "publication", - "peer-review" -> "publication", - "proceedings" -> "publication", + "book-section" -> "publication", + "book" -> "publication", + "book-chapter" -> "publication", + "book-part" -> "publication", + "book-series" -> "publication", + "book-set" -> "publication", + "book-track" -> "publication", + "edited-book" -> "publication", + "reference-book" -> "publication", + "monograph" -> "publication", + "journal-article" -> "publication", + "dissertation" -> "publication", + "other" -> "publication", + "peer-review" -> "publication", + "proceedings" -> "publication", "proceedings-article" -> "publication", - "reference-entry" -> "publication", - "report" -> "publication", - "report-series" -> "publication", - "standard" -> "publication", - "standard-series" -> "publication", - "posted-content" -> "publication", - "dataset" -> "dataset" + "reference-entry" -> "publication", + "report" -> "publication", + "report-series" -> "publication", + "standard" -> "publication", + "standard-series" -> "publication", + "posted-content" -> "publication", + "dataset" -> "dataset" ) - val mappingCrossrefSubType = Map( - "book-section" -> "0013 Part of book or chapter of book", - "book" -> "0002 Book", - "book-chapter" -> "0013 Part of book or chapter of book", - "book-part" -> "0013 Part of book or chapter of book", - "book-series" -> "0002 Book", - "book-set" -> "0002 Book", - "book-track" -> "0002 Book", - "edited-book" -> "0002 Book", - "reference-book" -> "0002 Book", - "monograph" -> "0002 Book", - "journal-article" -> "0001 Article", - "dissertation" -> "0044 Thesis", - "other" -> "0038 Other literature type", - "peer-review" -> "0015 Review", - "proceedings" -> "0004 Conference object", + "book-section" -> "0013 Part of book or chapter of book", + "book" -> "0002 Book", + "book-chapter" -> "0013 Part of book or chapter of book", + "book-part" -> "0013 Part of book or chapter of book", + "book-series" -> "0002 Book", + "book-set" -> "0002 Book", + "book-track" -> "0002 Book", + "edited-book" -> "0002 Book", + "reference-book" -> "0002 Book", + "monograph" -> "0002 Book", + "journal-article" -> "0001 Article", + "dissertation" -> "0044 Thesis", + "other" -> "0038 Other literature type", + "peer-review" -> "0015 Review", + "proceedings" -> "0004 Conference object", "proceedings-article" -> "0004 Conference object", - "reference-entry" -> "0013 Part of book or chapter of book", - "report" -> "0017 Report", - "report-series" -> "0017 Report", - "standard" -> "0038 Other literature type", - "standard-series" -> "0038 Other literature type", - "dataset" -> "0021 Dataset", - "preprint" -> "0016 Preprint", - "report" -> "0017 Report" + "reference-entry" -> "0013 Part of book or chapter of book", + "report" -> "0017 Report", + "report-series" -> "0017 Report", + "standard" -> "0038 Other literature type", + "standard-series" -> "0038 Other literature type", + "dataset" -> "0021 Dataset", + "preprint" -> "0016 Preprint", + "report" -> "0017 Report" ) def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { @@ -100,7 +104,6 @@ case object Crossref2Oaf { val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava) result.setOriginalId(originalIds) - // Add DataInfo result.setDataInfo(generateDataInfo()) @@ -111,98 +114,169 @@ case object Crossref2Oaf { // Publisher ( Name of work's publisher mapped into Result/Publisher) val publisher = (json \ "publisher").extractOrElse[String](null) - if (publisher!= null && publisher.nonEmpty) + if (publisher != null && publisher.nonEmpty) result.setPublisher(asField(publisher)) - // TITLE - val mainTitles = for {JString(title) <- json \ "title" if title.nonEmpty} yield createSP(title, "main title", ModelConstants.DNET_DATACITE_TITLE) - val originalTitles = for {JString(title) <- json \ "original-title" if title.nonEmpty} yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) - val shortTitles = for {JString(title) <- json \ "short-title" if title.nonEmpty} yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) - val subtitles = for {JString(title) <- json \ "subtitle" if title.nonEmpty} yield createSP(title, "subtitle", ModelConstants.DNET_DATACITE_TITLE) + val mainTitles = + for { JString(title) <- json \ "title" if title.nonEmpty } yield createSP( + title, + "main title", + ModelConstants.DNET_DATACITE_TITLE + ) + val originalTitles = for { + JString(title) <- json \ "original-title" if title.nonEmpty + } yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val shortTitles = for { + JString(title) <- json \ "short-title" if title.nonEmpty + } yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val subtitles = + for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield createSP( + title, + "subtitle", + ModelConstants.DNET_DATACITE_TITLE + ) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) // DESCRIPTION - val descriptionList = for {JString(description) <- json \ "abstract"} yield asField(description) + val descriptionList = + for { JString(description) <- json \ "abstract" } yield asField(description) result.setDescription(descriptionList.asJava) // Source - val sourceList = for {JString(source) <- json \ "source" if source!= null && source.nonEmpty} yield asField(source) + val sourceList = for { + JString(source) <- json \ "source" if source != null && source.nonEmpty + } yield asField(source) result.setSource(sourceList.asJava) //RELEVANT DATE Mapping - val createdDate = generateDate((json \ "created" \ "date-time").extract[String], (json \ "created" \ "date-parts").extract[List[List[Int]]], "created", ModelConstants.DNET_DATACITE_DATE) - val postedDate = generateDate((json \ "posted" \ "date-time").extractOrElse[String](null), (json \ "posted" \ "date-parts").extract[List[List[Int]]], "available", ModelConstants.DNET_DATACITE_DATE) - val acceptedDate = generateDate((json \ "accepted" \ "date-time").extractOrElse[String](null), (json \ "accepted" \ "date-parts").extract[List[List[Int]]], "accepted", ModelConstants.DNET_DATACITE_DATE) - val publishedPrintDate = generateDate((json \ "published-print" \ "date-time").extractOrElse[String](null), (json \ "published-print" \ "date-parts").extract[List[List[Int]]], "published-print", ModelConstants.DNET_DATACITE_DATE) - val publishedOnlineDate = generateDate((json \ "published-online" \ "date-time").extractOrElse[String](null), (json \ "published-online" \ "date-parts").extract[List[List[Int]]], "published-online", ModelConstants.DNET_DATACITE_DATE) + val createdDate = generateDate( + (json \ "created" \ "date-time").extract[String], + (json \ "created" \ "date-parts").extract[List[List[Int]]], + "created", + ModelConstants.DNET_DATACITE_DATE + ) + val postedDate = generateDate( + (json \ "posted" \ "date-time").extractOrElse[String](null), + (json \ "posted" \ "date-parts").extract[List[List[Int]]], + "available", + ModelConstants.DNET_DATACITE_DATE + ) + val acceptedDate = generateDate( + (json \ "accepted" \ "date-time").extractOrElse[String](null), + (json \ "accepted" \ "date-parts").extract[List[List[Int]]], + "accepted", + ModelConstants.DNET_DATACITE_DATE + ) + val publishedPrintDate = generateDate( + (json \ "published-print" \ "date-time").extractOrElse[String](null), + (json \ "published-print" \ "date-parts").extract[List[List[Int]]], + "published-print", + ModelConstants.DNET_DATACITE_DATE + ) + val publishedOnlineDate = generateDate( + (json \ "published-online" \ "date-time").extractOrElse[String](null), + (json \ "published-online" \ "date-parts").extract[List[List[Int]]], + "published-online", + ModelConstants.DNET_DATACITE_DATE + ) - val issuedDate = extractDate((json \ "issued" \ "date-time").extractOrElse[String](null), (json \ "issued" \ "date-parts").extract[List[List[Int]]]) + val issuedDate = extractDate( + (json \ "issued" \ "date-time").extractOrElse[String](null), + (json \ "issued" \ "date-parts").extract[List[List[Int]]] + ) if (StringUtils.isNotBlank(issuedDate)) { result.setDateofacceptance(asField(issuedDate)) - } - else { + } else { result.setDateofacceptance(asField(createdDate.getValue)) } - result.setRelevantdate(List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate).filter(p => p != null).asJava) + result.setRelevantdate( + List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate) + .filter(p => p != null) + .asJava + ) //Mapping Subject - val subjectList:List[String] = (json \ "subject").extractOrElse[List[String]](List()) + val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List()) if (subjectList.nonEmpty) { - result.setSubject(subjectList.map(s=> createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava) + result.setSubject( + subjectList.map(s => createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava + ) } - - //Mapping Author - val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List()) + val authorList: List[mappingAuthor] = + (json \ "author").extractOrElse[List[mappingAuthor]](List()) + val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => + a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") + ) - - val sorted_list = authorList.sortWith((a:mappingAuthor, b:mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")) - - result.setAuthor(sorted_list.zipWithIndex.map{case (a, index) => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)}.asJava) + result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => + generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) + }.asJava) // Mapping instance val instance = new Instance() val license = for { - JObject(license) <- json \ "license" - JField("URL", JString(lic)) <- license + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license JField("content-version", JString(content_version)) <- license } yield (asField(lic), content_version) val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) - if (l.nonEmpty){ - if (l exists (d => d._2.equals("vor"))){ - for(d <- l){ - if (d._2.equals("vor")){ + if (l.nonEmpty) { + if (l exists (d => d._2.equals("vor"))) { + for (d <- l) { + if (d._2.equals("vor")) { instance.setLicense(d._1) } } + } else { + instance.setLicense(l.head._1) } - else{ - instance.setLicense(l.head._1)} } // Ticket #6281 added pid to Instance instance.setPid(result.getPid) - val has_review = json \ "relation" \"has-review" \ "id" + val has_review = json \ "relation" \ "has-review" \ "id" - if(has_review != JNothing) { + if (has_review != JNothing) { instance.setRefereed( - OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS)) + OafMapperUtils.qualifier( + "0001", + "peerReviewed", + ModelConstants.DNET_REVIEW_LEVELS, + ModelConstants.DNET_REVIEW_LEVELS + ) + ) } - instance.setAccessright(decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)) - instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + instance.setAccessright( + decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) + ) + instance.setInstancetype( + OafMapperUtils.qualifier( + cobjCategory.substring(0, 4), + cobjCategory.substring(5), + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + result.setResourcetype( + OafMapperUtils.qualifier( + cobjCategory.substring(0, 4), + cobjCategory.substring(5), + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) instance.setCollectedfrom(createCrossrefCollectedFrom()) if (StringUtils.isNotBlank(issuedDate)) { instance.setDateofacceptance(asField(issuedDate)) - } - else { + } else { instance.setDateofacceptance(asField(createdDate.getValue)) } val s: List[String] = List("https://doi.org/" + doi) @@ -210,10 +284,9 @@ case object Crossref2Oaf { // if (links.nonEmpty) { // instance.setUrl(links.asJava) // } - if(s.nonEmpty) - { - instance.setUrl(s.asJava) - } + if (s.nonEmpty) { + instance.setUrl(s.asJava) + } result.setInstance(List(instance).asJava) @@ -236,15 +309,23 @@ case object Crossref2Oaf { result } - - def generateAuhtor(given: String, family: String, orcid: String, index:Int): Author = { + def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = { val a = new Author a.setName(given) a.setSurname(family) a.setFullname(s"$given $family") - a.setRank(index+1) + a.setRank(index + 1) if (StringUtils.isNotBlank(orcid)) - a.setPid(List(createSP(orcid, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES, generateDataInfo())).asJava) + a.setPid( + List( + createSP( + orcid, + ModelConstants.ORCID_PENDING, + ModelConstants.DNET_PID_TYPES, + generateDataInfo() + ) + ).asJava + ) a } @@ -255,54 +336,62 @@ case object Crossref2Oaf { var resultList: List[Oaf] = List() - val objectType = (json \ "type").extractOrElse[String](null) val objectSubType = (json \ "subtype").extractOrElse[String](null) if (objectType == null) return resultList - val result = generateItemFromType(objectType, objectSubType) if (result == null) return List() - val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")) + val cOBJCategory = mappingCrossrefSubType.getOrElse( + objectType, + mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type") + ) mappingResult(result, json, cOBJCategory) if (result == null || result.getId == null) return List() - - val funderList: List[mappingFunder] = (json \ "funder").extractOrElse[List[mappingFunder]](List()) + val funderList: List[mappingFunder] = + (json \ "funder").extractOrElse[List[mappingFunder]](List()) if (funderList.nonEmpty) { - resultList = resultList ::: mappingFunderToRelations(funderList, result.getId, createCrossrefCollectedFrom(), result.getDataInfo, result.getLastupdatetimestamp) + resultList = resultList ::: mappingFunderToRelations( + funderList, + result.getId, + createCrossrefCollectedFrom(), + result.getDataInfo, + result.getLastupdatetimestamp + ) } - result match { case publication: Publication => convertPublication(publication, json, cOBJCategory) - case dataset: Dataset => convertDataset(dataset) + case dataset: Dataset => convertDataset(dataset) } resultList = resultList ::: List(result) resultList } - - def mappingFunderToRelations(funders: List[mappingFunder], sourceId: String, cf: KeyValue, di: DataInfo, ts: Long): List[Relation] = { + def mappingFunderToRelations( + funders: List[mappingFunder], + sourceId: String, + cf: KeyValue, + di: DataInfo, + ts: Long + ): List[Relation] = { val queue = new mutable.Queue[Relation] - - def snsfRule(award:String): String = { - val tmp1 = StringUtils.substringAfter(award,"_") - val tmp2 = StringUtils.substringBefore(tmp1,"/") + def snsfRule(award: String): String = { + val tmp1 = StringUtils.substringAfter(award, "_") + val tmp2 = StringUtils.substringBefore(tmp1, "/") logger.debug(s"From $award to $tmp2") tmp2 - } - def extractECAward(award: String): String = { val awardECRegex: Regex = "[0-9]{4,9}".r if (awardECRegex.findAllIn(award).hasNext) @@ -310,8 +399,7 @@ case object Crossref2Oaf { null } - - def generateRelation(sourceId:String, targetId:String, relClass:String) :Relation = { + def generateRelation(sourceId: String, targetId: String, relClass: String): Relation = { val r = new Relation r.setSource(sourceId) @@ -324,98 +412,121 @@ case object Crossref2Oaf { r.setLastupdatetimestamp(ts) r - } - - def generateSimpleRelationFromAward(funder: mappingFunder, nsPrefix: String, extractField: String => String): Unit = { + def generateSimpleRelationFromAward( + funder: mappingFunder, + nsPrefix: String, + extractField: String => String + ): Unit = { if (funder.award.isDefined && funder.award.get.nonEmpty) - funder.award.get.map(extractField).filter(a => a!= null && a.nonEmpty).foreach( - award => { + funder.award.get + .map(extractField) + .filter(a => a != null && a.nonEmpty) + .foreach(award => { val targetId = getProjectId(nsPrefix, DHPUtils.md5(award)) - queue += generateRelation(sourceId, targetId , ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId , sourceId, ModelConstants.PRODUCES) - } - ) + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + }) } - def getProjectId (nsPrefix:String, targetId:String):String = { + def getProjectId(nsPrefix: String, targetId: String): String = { s"40|$nsPrefix::$targetId" } - if (funders != null) - funders.foreach(funder => { - if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { - funder.DOI.get match { - case "10.13039/100010663" | - "10.13039/100010661" | - "10.13039/501100007601" | - "10.13039/501100000780" | - "10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100011199" | - "10.13039/100004431" | - "10.13039/501100004963" | - "10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) - case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) - case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) - case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) - case "10.13039/501100000038"=> val targetId = getProjectId("nserc_______" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) - case "10.13039/501100000155"=> val targetId = getProjectId("sshrc_______" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) - case "10.13039/501100000024"=> val targetId = getProjectId("cihr________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) - case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) - case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) - case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a=>a) - case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) - case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a=>a) - val targetId = getProjectId("miur________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) - case "10.13039/501100006588" | - "10.13039/501100004488" => generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") ) - case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a) - case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snsfRule) - case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a) - case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a) - case "10.13039/100004440"=> val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) + funders.foreach(funder => { + if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { + funder.DOI.get match { + case "10.13039/100010663" | "10.13039/100010661" | + "10.13039/501100007601" | "10.13039/501100000780" | "10.13039/100010665" => + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | + "10.13039/501100000780" => + generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + case "10.13039/501100000781" => + generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "10.13039/100000001" => + generateSimpleRelationFromAward(funder, "nsf_________", a => a) + case "10.13039/501100001665" => + generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "10.13039/501100002341" => + generateSimpleRelationFromAward(funder, "aka_________", a => a) + case "10.13039/501100001602" => + generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) + case "10.13039/501100000923" => + generateSimpleRelationFromAward(funder, "arc_________", a => a) + case "10.13039/501100000038" => + val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100000155" => + val targetId = getProjectId("sshrc_______", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100000024" => + val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100002848" => + generateSimpleRelationFromAward(funder, "conicytf____", a => a) + case "10.13039/501100003448" => + generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) + case "10.13039/501100010198" => + generateSimpleRelationFromAward(funder, "sgov________", a => a) + case "10.13039/501100004564" => + generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) + case "10.13039/501100003407" => + generateSimpleRelationFromAward(funder, "miur________", a => a) + val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100006588" | "10.13039/501100004488" => + generateSimpleRelationFromAward( + funder, + "irb_hr______", + a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", "") + ) + case "10.13039/501100006769" => + generateSimpleRelationFromAward(funder, "rsf_________", a => a) + case "10.13039/501100001711" => + generateSimpleRelationFromAward(funder, "snsf________", snsfRule) + case "10.13039/501100004410" => + generateSimpleRelationFromAward(funder, "tubitakf____", a => a) + case "10.10.13039/100004440" => + generateSimpleRelationFromAward(funder, "wt__________", a => a) + case "10.13039/100004440" => + val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) - case _ => logger.debug("no match for "+funder.DOI.get ) + case _ => logger.debug("no match for " + funder.DOI.get) + } + } else { + funder.name match { + case "European Union’s Horizon 2020 research and innovation program" => + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "European Union's" => + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + case "The French National Research Agency (ANR)" | + "The French National Research Agency" => + generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "CONICYT, Programa de Formación de Capital Humano Avanzado" => + generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) + case "Wellcome Trust Masters Fellowship" => + val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case _ => logger.debug("no match for " + funder.name) + + } } - - } else { - funder.name match { - case "European Union’s Horizon 2020 research and innovation program" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "European Union's" => - generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - case "The French National Research Agency (ANR)" | - "The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) - case "Wellcome Trust Masters Fellowship" => val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY ) - queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES ) - case _ => logger.debug("no match for "+funder.name ) - - } - } - - } - ) + }) queue.toList } @@ -423,33 +534,31 @@ case object Crossref2Oaf { // TODO check if there are other info to map into the Dataset } - def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - val containerTitles = for {JString(ct) <- json \ "container-title"} yield ct - + val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct //Mapping book if (cobjCategory.toLowerCase.contains("book")) { - val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn + val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn if (ISBN.nonEmpty && containerTitles.nonEmpty) { val source = s"${containerTitles.head} ISBN: ${ISBN.head}" if (publication.getSource != null) { val l: List[Field[String]] = publication.getSource.asScala.toList val ll: List[Field[String]] = l ::: List(asField(source)) publication.setSource(ll.asJava) - } - else + } else publication.setSource(List(asField(source)).asJava) } } else { // Mapping Journal - val issnInfos = for {JArray(issn_types) <- json \ "issn-type" - JObject(issn_type) <- issn_types - JField("type", JString(tp)) <- issn_type - JField("value", JString(vl)) <- issn_type - } yield Tuple2(tp, vl) + val issnInfos = for { + JArray(issn_types) <- json \ "issn-type" + JObject(issn_type) <- issn_types + JField("type", JString(tp)) <- issn_type + JField("value", JString(vl)) <- issn_type + } yield Tuple2(tp, vl) val volume = (json \ "volume").extractOrElse[String](null) if (containerTitles.nonEmpty) { @@ -460,7 +569,7 @@ case object Crossref2Oaf { issnInfos.foreach(tp => { tp._1 match { case "electronic" => journal.setIssnOnline(tp._2) - case "print" => journal.setIssnPrinted(tp._2) + case "print" => journal.setIssnPrinted(tp._2) } }) } @@ -494,7 +603,12 @@ case object Crossref2Oaf { } - def generateDate(dt: String, datePart: List[List[Int]], classId: String, schemeId: String): StructuredProperty = { + def generateDate( + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { val dp = extractDate(dt, datePart) if (StringUtils.isNotBlank(dp)) return createSP(dp, classId, schemeId) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 6a1c701af..c6e4706d7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -16,7 +16,6 @@ object CrossrefDataset { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - def to_item(input: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -29,19 +28,24 @@ object CrossrefDataset { def main(args: Array[String]): Unit = { - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + CrossrefDataset.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkMapDumpIntoOAF.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() import spark.implicits._ - val crossrefAggregator = new Aggregator[CrossrefDT, CrossrefDT, CrossrefDT] with Serializable { override def zero: CrossrefDT = null @@ -52,7 +56,6 @@ object CrossrefDataset { if (a == null) return b - if (a.timestamp > b.timestamp) { return a } @@ -80,19 +83,24 @@ object CrossrefDataset { val workingPath: String = parser.get("workingPath") - val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] - val update = - spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) - .map(i => CrossrefImporter.decompressBlob(i._2.toString)) - .map(i => to_item(i))) + spark.createDataset( + spark.sparkContext + .sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i => CrossrefImporter.decompressBlob(i._2.toString)) + .map(i => to_item(i)) + ) - main_ds.union(update).groupByKey(_.doi) + main_ds + .union(update) + .groupByKey(_.doi) .agg(crossrefAggregator.toColumn) .map(s => s._2) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index 6d03abc25..df185910e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -18,7 +18,6 @@ object GenerateCrossrefDataset { implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT] - def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) @@ -30,13 +29,23 @@ object GenerateCrossrefDataset { def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" + ) + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val spark: SparkSession = SparkSession.builder().config(conf) + val spark: SparkSession = SparkSession + .builder() + .config(conf) .appName(UnpackCrtossrefEntries.getClass.getSimpleName) .master(master) .getOrCreate() @@ -44,12 +53,14 @@ object GenerateCrossrefDataset { import spark.implicits._ - val tmp: RDD[String] = sc.textFile(sourcePath, 6000) - spark.createDataset(tmp) + spark + .createDataset(tmp) .map(entry => crossrefElement(entry)) - .write.mode(SaveMode.Overwrite).save(targetPath) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) // .map(meta => crossrefElement(meta)) // .toDS.as[CrossrefDT] // .write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index fa55b9fb9..96923f000 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -8,7 +8,6 @@ import org.apache.spark.SparkConf import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - case class Reference(author: String, firstPage: String) {} object SparkMapDumpIntoOAF { @@ -19,14 +18,21 @@ object SparkMapDumpIntoOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkMapDumpIntoOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkMapDumpIntoOAF.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] @@ -35,19 +41,34 @@ object SparkMapDumpIntoOAF { val targetPath = parser.get("targetPath") - spark.read.load(parser.get("sourcePath")).as[CrossrefDT] + spark.read + .load(parser.get("sourcePath")) + .as[CrossrefDT] .flatMap(k => Crossref2Oaf.convert(k.json)) .filter(o => o != null) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/mixObject") + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/mixObject") - val ds:Dataset[Oaf] = spark.read.load(s"$targetPath/mixObject").as[Oaf] + val ds: Dataset[Oaf] = spark.read.load(s"$targetPath/mixObject").as[Oaf] - ds.filter(o => o.isInstanceOf[Publication]).map(o => o.asInstanceOf[Publication]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefPublication") + ds.filter(o => o.isInstanceOf[Publication]) + .map(o => o.asInstanceOf[Publication]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/crossrefPublication") - ds.filter(o => o.isInstanceOf[Relation]).map(o => o.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefRelation") + ds.filter(o => o.isInstanceOf[Relation]) + .map(o => o.asInstanceOf[Relation]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/crossrefRelation") - ds.filter(o => o.isInstanceOf[OafDataset]).map(o => o.asInstanceOf[OafDataset]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefDataset") + ds.filter(o => o.isInstanceOf[OafDataset]) + .map(o => o.asInstanceOf[OafDataset]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/crossrefDataset") } - } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala index 191c4587e..3fea9695c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala @@ -16,7 +16,6 @@ object UnpackCrtossrefEntries { val log: Logger = LoggerFactory.getLogger(UnpackCrtossrefEntries.getClass) - def extractDump(input: String): List[String] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -24,28 +23,36 @@ object UnpackCrtossrefEntries { val a = (json \ "items").extract[JArray] a.arr.map(s => compact(render(s))) - } - def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" + ) + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val spark: SparkSession = SparkSession.builder().config(conf) + val spark: SparkSession = SparkSession + .builder() + .config(conf) .appName(UnpackCrtossrefEntries.getClass.getSimpleName) .master(master) .getOrCreate() val sc: SparkContext = spark.sparkContext - sc.wholeTextFiles(sourcePath, 6000).flatMap(d => extractDump(d._2)) + sc.wholeTextFiles(sourcePath, 6000) + .flatMap(d => extractDump(d._2)) .saveAsTextFile(targetPath, classOf[GzipCodec]) - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala index 0a6fa00f0..18ba864ce 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -1,6 +1,5 @@ package eu.dnetlib.doiboost.mag - import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} @@ -14,59 +13,134 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex - -case class MagPapers(PaperId: Long, Rank: Integer, Doi: String, - DocType: String, PaperTitle: String, OriginalTitle: String, - BookTitle: String, Year: Option[Integer], Date: Option[java.sql.Timestamp], Publisher: String, - JournalId: Option[Long], ConferenceSeriesId: Option[Long], ConferenceInstanceId: Option[Long], - Volume: String, Issue: String, FirstPage: String, LastPage: String, - ReferenceCount: Option[Long], CitationCount: Option[Long], EstimatedCitation: Option[Long], - OriginalVenue: String, FamilyId: Option[Long], CreatedDate: java.sql.Timestamp) {} - +case class MagPapers( + PaperId: Long, + Rank: Integer, + Doi: String, + DocType: String, + PaperTitle: String, + OriginalTitle: String, + BookTitle: String, + Year: Option[Integer], + Date: Option[java.sql.Timestamp], + Publisher: String, + JournalId: Option[Long], + ConferenceSeriesId: Option[Long], + ConferenceInstanceId: Option[Long], + Volume: String, + Issue: String, + FirstPage: String, + LastPage: String, + ReferenceCount: Option[Long], + CitationCount: Option[Long], + EstimatedCitation: Option[Long], + OriginalVenue: String, + FamilyId: Option[Long], + CreatedDate: java.sql.Timestamp +) {} case class MagPaperAbstract(PaperId: Long, IndexedAbstract: String) {} -case class MagAuthor(AuthorId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], LastKnownAffiliationId: Option[Long], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {} +case class MagAuthor( + AuthorId: Long, + Rank: Option[Int], + NormalizedName: Option[String], + DisplayName: Option[String], + LastKnownAffiliationId: Option[Long], + PaperCount: Option[Long], + CitationCount: Option[Long], + CreatedDate: Option[java.sql.Timestamp] +) {} -case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String, DisplayName: String, GridId: String, OfficialPage: String, WikiPage: String, PaperCount: Long, CitationCount: Long, Latitude: Option[Float], Longitude: Option[Float], CreatedDate: java.sql.Timestamp) {} +case class MagAffiliation( + AffiliationId: Long, + Rank: Int, + NormalizedName: String, + DisplayName: String, + GridId: String, + OfficialPage: String, + WikiPage: String, + PaperCount: Long, + CitationCount: Long, + Latitude: Option[Float], + Longitude: Option[Float], + CreatedDate: java.sql.Timestamp +) {} -case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {} +case class MagPaperAuthorAffiliation( + PaperId: Long, + AuthorId: Long, + AffiliationId: Option[Long], + AuthorSequenceNumber: Int, + OriginalAuthor: String, + OriginalAffiliation: String +) {} - -case class MagAuthorAffiliation(author: MagAuthor, affiliation:String, sequenceNumber:Int) +case class MagAuthorAffiliation(author: MagAuthor, affiliation: String, sequenceNumber: Int) case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {} -case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String, sequenceNumber:Int) {} +case class MagPaperAuthorDenormalized( + PaperId: Long, + author: MagAuthor, + affiliation: String, + sequenceNumber: Int +) {} -case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {} +case class MagPaperUrl( + PaperId: Long, + SourceType: Option[Int], + SourceUrl: Option[String], + LanguageCode: Option[String] +) {} -case class MagUrlInstance(SourceUrl:String){} +case class MagUrlInstance(SourceUrl: String) {} case class MagUrl(PaperId: Long, instances: List[MagUrlInstance]) -case class MagSubject(FieldOfStudyId:Long, DisplayName:String, MainType:Option[String], Score:Float){} +case class MagSubject( + FieldOfStudyId: Long, + DisplayName: String, + MainType: Option[String], + Score: Float +) {} -case class MagFieldOfStudy(PaperId:Long, subjects:List[MagSubject]) {} +case class MagFieldOfStudy(PaperId: Long, subjects: List[MagSubject]) {} -case class MagJournal(JournalId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], Issn: Option[String], Publisher: Option[String], Webpage: Option[String], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {} +case class MagJournal( + JournalId: Long, + Rank: Option[Int], + NormalizedName: Option[String], + DisplayName: Option[String], + Issn: Option[String], + Publisher: Option[String], + Webpage: Option[String], + PaperCount: Option[Long], + CitationCount: Option[Long], + CreatedDate: Option[java.sql.Timestamp] +) {} - -case class MagConferenceInstance(ci:Long, DisplayName:Option[String], Location:Option[String], StartDate:Option[java.sql.Timestamp], EndDate:Option[java.sql.Timestamp], PaperId:Long){} +case class MagConferenceInstance( + ci: Long, + DisplayName: Option[String], + Location: Option[String], + StartDate: Option[java.sql.Timestamp], + EndDate: Option[java.sql.Timestamp], + PaperId: Long +) {} case object ConversionUtil { - def extractMagIdentifier(pids:mutable.Buffer[String]) :String ={ + def extractMagIdentifier(pids: mutable.Buffer[String]): String = { val magIDRegex: Regex = "^[0-9]+$".r - val s =pids.filter(p=> magIDRegex.findAllIn(p).hasNext) + val s = pids.filter(p => magIDRegex.findAllIn(p).hasNext) if (s.nonEmpty) return s.head null } - - def mergePublication(a: Publication, b:Publication) : Publication = { + def mergePublication(a: Publication, b: Publication): Publication = { if ((a != null) && (b != null)) { a.mergeFrom(b) a @@ -74,10 +148,9 @@ case object ConversionUtil { if (a == null) b else a } - } - def choiceLatestMagArtitcle(p1: MagPapers, p2:MagPapers) :MagPapers = { + def choiceLatestMagArtitcle(p1: MagPapers, p2: MagPapers): MagPapers = { var r = if (p1 == null) p2 else p1 if (p1 != null && p2 != null) { if (p1.CreatedDate != null && p2.CreatedDate != null) { @@ -93,8 +166,9 @@ case object ConversionUtil { } - - def updatePubsWithDescription(inputItem:((String, Publication), MagPaperAbstract)) : Publication = { + def updatePubsWithDescription( + inputItem: ((String, Publication), MagPaperAbstract) + ): Publication = { val pub = inputItem._1._2 val abst = inputItem._2 if (abst != null) { @@ -104,20 +178,22 @@ case object ConversionUtil { } + def updatePubsWithConferenceInfo( + inputItem: ((String, Publication), MagConferenceInstance) + ): Publication = { + val publication: Publication = inputItem._1._2 + val ci: MagConferenceInstance = inputItem._2 - def updatePubsWithConferenceInfo(inputItem:((String, Publication), MagConferenceInstance)) : Publication = { - val publication:Publication= inputItem._1._2 - val ci:MagConferenceInstance = inputItem._2 + if (ci != null) { - if (ci!= null){ - - val j:Journal = new Journal + val j: Journal = new Journal if (ci.Location.isDefined) j.setConferenceplace(ci.Location.get) j.setName(ci.DisplayName.get) - if (ci.StartDate.isDefined && ci.EndDate.isDefined) - { - j.setConferencedate(s"${ci.StartDate.get.toString.substring(0,10)} - ${ci.EndDate.get.toString.substring(0,10)}") + if (ci.StartDate.isDefined && ci.EndDate.isDefined) { + j.setConferencedate( + s"${ci.StartDate.get.toString.substring(0, 10)} - ${ci.EndDate.get.toString.substring(0, 10)}" + ) } publication.setJournal(j) @@ -125,7 +201,7 @@ case object ConversionUtil { publication } - def updatePubsWithSubject(item:((String, Publication), MagFieldOfStudy)) : Publication = { + def updatePubsWithSubject(item: ((String, Publication), MagFieldOfStudy)): Publication = { val publication = item._1._2 val fieldOfStudy = item._2 @@ -135,16 +211,34 @@ case object ConversionUtil { val classid = "MAG" val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => { - val s1 = createSP(s.DisplayName, classid,className, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES) + val s1 = createSP( + s.DisplayName, + classid, + className, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES + ) val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString) var resList: List[StructuredProperty] = List(s1) if (s.MainType.isDefined) { val maintp = s.MainType.get - val s2 = createSP(s.MainType.get, classid,className, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES) + val s2 = createSP( + s.MainType.get, + classid, + className, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES + ) s2.setDataInfo(di) resList = resList ::: List(s2) if (maintp.contains(".")) { - val s3 = createSP(maintp.split("\\.").head, classid,className, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES) + val s3 = createSP( + maintp.split("\\.").head, + classid, + className, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES + ) s3.setDataInfo(di) resList = resList ::: List(s3) } @@ -156,25 +250,27 @@ case object ConversionUtil { publication } - - def addInstances(a: (Publication, MagUrl)): Publication = { val pub = a._1 val urls = a._2 - - val i = new Instance + if (urls != null) { - if (urls!= null) { - - val l:List[String] = urls.instances.filter(k=>k.SourceUrl.nonEmpty).map(k=>k.SourceUrl):::List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}") + val l: List[String] = urls.instances + .filter(k => k.SourceUrl.nonEmpty) + .map(k => k.SourceUrl) ::: List( + s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}" + ) i.setUrl(l.asJava) - } - else - i.setUrl(List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}").asJava) + } else + i.setUrl( + List( + s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}" + ).asJava + ) // Ticket #6281 added pid to Instance i.setPid(pub.getPid) @@ -184,13 +280,13 @@ case object ConversionUtil { pub } - def transformPaperAbstract(input: MagPaperAbstract): MagPaperAbstract = { MagPaperAbstract(input.PaperId, convertInvertedIndexString(input.IndexedAbstract)) } - - def createOAFFromJournalAuthorPaper(inputParams: ((MagPapers, MagJournal), MagPaperWithAuthorList)): Publication = { + def createOAFFromJournalAuthorPaper( + inputParams: ((MagPapers, MagJournal), MagPaperWithAuthorList) + ): Publication = { val paper = inputParams._1._1 val journal = inputParams._1._2 val authors = inputParams._2 @@ -206,31 +302,37 @@ case object ConversionUtil { pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) val mainTitles = createSP(paper.PaperTitle, "main title", ModelConstants.DNET_DATACITE_TITLE) - val originalTitles = createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val originalTitles = + createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) pub.setTitle(List(mainTitles, originalTitles).asJava) pub.setSource(List(asField(paper.BookTitle)).asJava) val authorsOAF = authors.authors.map { f: MagAuthorAffiliation => - val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author a.setRank(f.sequenceNumber) if (f.author.DisplayName.isDefined) a.setFullname(f.author.DisplayName.get) - if(f.affiliation!= null) + if (f.affiliation != null) a.setAffiliation(List(asField(f.affiliation)).asJava) - a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", ModelConstants.DNET_PID_TYPES)).asJava) + a.setPid( + List( + createSP( + s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", + "URL", + ModelConstants.DNET_PID_TYPES + ) + ).asJava + ) a } pub.setAuthor(authorsOAF.asJava) - if (paper.Date != null && paper.Date.isDefined) { - pub.setDateofacceptance(asField(paper.Date.get.toString.substring(0,10))) + pub.setDateofacceptance(asField(paper.Date.get.toString.substring(0, 10))) } pub.setPublisher(asField(paper.Publisher)) - if (journal != null && journal.DisplayName.isDefined) { val j = new Journal @@ -250,8 +352,9 @@ case object ConversionUtil { pub } - - def createOAF(inputParams: ((MagPapers, MagPaperWithAuthorList), MagPaperAbstract)): Publication = { + def createOAF( + inputParams: ((MagPapers, MagPaperWithAuthorList), MagPaperAbstract) + ): Publication = { val paper = inputParams._1._1 val authors = inputParams._1._2 @@ -268,46 +371,48 @@ case object ConversionUtil { pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) val mainTitles = createSP(paper.PaperTitle, "main title", ModelConstants.DNET_DATACITE_TITLE) - val originalTitles = createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val originalTitles = + createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) pub.setTitle(List(mainTitles, originalTitles).asJava) pub.setSource(List(asField(paper.BookTitle)).asJava) - if (description != null) { pub.setDescription(List(asField(description.IndexedAbstract)).asJava) } - val authorsOAF = authors.authors.map { f: MagAuthorAffiliation => - val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author a.setFullname(f.author.DisplayName.get) - if(f.affiliation!= null) + if (f.affiliation != null) a.setAffiliation(List(asField(f.affiliation)).asJava) - - a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", ModelConstants.DNET_PID_TYPES)).asJava) + a.setPid( + List( + createSP( + s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", + "URL", + ModelConstants.DNET_PID_TYPES + ) + ).asJava + ) a } - if (paper.Date != null) { - pub.setDateofacceptance(asField(paper.Date.toString.substring(0,10))) + pub.setDateofacceptance(asField(paper.Date.toString.substring(0, 10))) } pub.setAuthor(authorsOAF.asJava) - pub } - def convertInvertedIndexString(json_input: String): String = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(json_input) @@ -317,13 +422,13 @@ case object ConversionUtil { val iid = (json \ "InvertedIndex").extract[Map[String, List[Int]]] - for {(k: String, v: List[Int]) <- iid} { + for { (k: String, v: List[Int]) <- iid } { v.foreach(item => res(item) = k) } - (0 until idl).foreach(i => { - if (res(i) == null) - res(i) = "" - }) + (0 until idl).foreach(i => { + if (res(i) == null) + res(i) = "" + }) return res.mkString(" ") } "" diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index d25a4893f..75bb98dc8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -8,44 +8,244 @@ import org.apache.spark.sql.{SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkImportMagIntoDataset { + val datatypedict = Map( - "bool" -> BooleanType, - "int" -> IntegerType, - "uint" -> IntegerType, - "long" -> LongType, - "ulong" -> LongType, - "float" -> FloatType, - "string" -> StringType, + "bool" -> BooleanType, + "int" -> IntegerType, + "uint" -> IntegerType, + "long" -> LongType, + "ulong" -> LongType, + "float" -> FloatType, + "string" -> StringType, "DateTime" -> DateType ) - val stream = Map( - "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), - "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), - "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), - "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), - "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), - "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), - "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), - "PaperExtendedAttributes" -> Tuple2("mag/PaperExtendedAttributes.txt", Seq("PaperId:long", "AttributeType:int", "AttributeValue:string")), - "PaperFieldsOfStudy" -> Tuple2("advanced/PaperFieldsOfStudy.txt", Seq("PaperId:long", "FieldOfStudyId:long", "Score:float")), - "PaperMeSH" -> Tuple2("advanced/PaperMeSH.txt", Seq("PaperId:long", "DescriptorUI:string", "DescriptorName:string", "QualifierUI:string", "QualifierName:string", "IsMajorTopic:bool")), - "PaperRecommendations" -> Tuple2("advanced/PaperRecommendations.txt", Seq("PaperId:long", "RecommendedPaperId:long", "Score:float")), - "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")), - "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")), - "PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")), - "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "CreatedDate:DateTime")), - "RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float")) + "Affiliations" -> Tuple2( + "mag/Affiliations.txt", + Seq( + "AffiliationId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "GridId:string", + "OfficialPage:string", + "WikiPage:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "Iso3166Code:string", + "Latitude:float?", + "Longitude:float?", + "CreatedDate:DateTime" + ) + ), + "AuthorExtendedAttributes" -> Tuple2( + "mag/AuthorExtendedAttributes.txt", + Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string") + ), + "Authors" -> Tuple2( + "mag/Authors.txt", + Seq( + "AuthorId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "LastKnownAffiliationId:long?", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "ConferenceInstances" -> Tuple2( + "mag/ConferenceInstances.txt", + Seq( + "ConferenceInstanceId:long", + "NormalizedName:string", + "DisplayName:string", + "ConferenceSeriesId:long", + "Location:string", + "OfficialUrl:string", + "StartDate:DateTime?", + "EndDate:DateTime?", + "AbstractRegistrationDate:DateTime?", + "SubmissionDeadlineDate:DateTime?", + "NotificationDueDate:DateTime?", + "FinalVersionDueDate:DateTime?", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "Latitude:float?", + "Longitude:float?", + "CreatedDate:DateTime" + ) + ), + "ConferenceSeries" -> Tuple2( + "mag/ConferenceSeries.txt", + Seq( + "ConferenceSeriesId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "EntityRelatedEntities" -> Tuple2( + "advanced/EntityRelatedEntities.txt", + Seq( + "EntityId:long", + "EntityType:string", + "RelatedEntityId:long", + "RelatedEntityType:string", + "RelatedType:int", + "Score:float" + ) + ), + "FieldOfStudyChildren" -> Tuple2( + "advanced/FieldOfStudyChildren.txt", + Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long") + ), + "FieldOfStudyExtendedAttributes" -> Tuple2( + "advanced/FieldOfStudyExtendedAttributes.txt", + Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string") + ), + "FieldsOfStudy" -> Tuple2( + "advanced/FieldsOfStudy.txt", + Seq( + "FieldOfStudyId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "MainType:string", + "Level:int", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "Journals" -> Tuple2( + "mag/Journals.txt", + Seq( + "JournalId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "Issn:string", + "Publisher:string", + "Webpage:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "PaperAbstractsInvertedIndex" -> Tuple2( + "nlp/PaperAbstractsInvertedIndex.txt.*", + Seq("PaperId:long", "IndexedAbstract:string") + ), + "PaperAuthorAffiliations" -> Tuple2( + "mag/PaperAuthorAffiliations.txt", + Seq( + "PaperId:long", + "AuthorId:long", + "AffiliationId:long?", + "AuthorSequenceNumber:uint", + "OriginalAuthor:string", + "OriginalAffiliation:string" + ) + ), + "PaperCitationContexts" -> Tuple2( + "nlp/PaperCitationContexts.txt", + Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string") + ), + "PaperExtendedAttributes" -> Tuple2( + "mag/PaperExtendedAttributes.txt", + Seq("PaperId:long", "AttributeType:int", "AttributeValue:string") + ), + "PaperFieldsOfStudy" -> Tuple2( + "advanced/PaperFieldsOfStudy.txt", + Seq("PaperId:long", "FieldOfStudyId:long", "Score:float") + ), + "PaperMeSH" -> Tuple2( + "advanced/PaperMeSH.txt", + Seq( + "PaperId:long", + "DescriptorUI:string", + "DescriptorName:string", + "QualifierUI:string", + "QualifierName:string", + "IsMajorTopic:bool" + ) + ), + "PaperRecommendations" -> Tuple2( + "advanced/PaperRecommendations.txt", + Seq("PaperId:long", "RecommendedPaperId:long", "Score:float") + ), + "PaperReferences" -> Tuple2( + "mag/PaperReferences.txt", + Seq("PaperId:long", "PaperReferenceId:long") + ), + "PaperResources" -> Tuple2( + "mag/PaperResources.txt", + Seq( + "PaperId:long", + "ResourceType:int", + "ResourceUrl:string", + "SourceUrl:string", + "RelationshipType:int" + ) + ), + "PaperUrls" -> Tuple2( + "mag/PaperUrls.txt", + Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string") + ), + "Papers" -> Tuple2( + "mag/Papers.txt", + Seq( + "PaperId:long", + "Rank:uint", + "Doi:string", + "DocType:string", + "PaperTitle:string", + "OriginalTitle:string", + "BookTitle:string", + "Year:int?", + "Date:DateTime?", + "OnlineDate:DateTime?", + "Publisher:string", + "JournalId:long?", + "ConferenceSeriesId:long?", + "ConferenceInstanceId:long?", + "Volume:string", + "Issue:string", + "FirstPage:string", + "LastPage:string", + "ReferenceCount:long", + "CitationCount:long", + "EstimatedCitation:long", + "OriginalVenue:string", + "FamilyId:long?", + "FamilyRank:uint?", + "CreatedDate:DateTime" + ) + ), + "RelatedFieldOfStudy" -> Tuple2( + "advanced/RelatedFieldOfStudy.txt", + Seq( + "FieldOfStudyId1:long", + "Type1:string", + "FieldOfStudyId2:long", + "Type2:string", + "Rank:float" + ) + ) ) - def getSchema(streamName: String): StructType = { var schema = new StructType() val d: Seq[String] = stream(streamName)._2 @@ -61,19 +261,22 @@ object SparkImportMagIntoDataset { schema } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() stream.foreach { case (k, v) => val s: StructType = getSchema(k) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 41e95baa1..5c960c15b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -9,21 +9,42 @@ import org.apache.spark.sql.functions.{col, collect_list, struct} import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ + object SparkProcessMAG { def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { d.where(col("Doi").isNotNull) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) + .reduceGroups((p1: MagPapers, p2: MagPapers) => + ConversionUtil.choiceLatestMagArtitcle(p1, p2) + ) .map(_._2)(Encoders.product[MagPapers]) .map(mp => { - MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), - mp.DocType, mp.PaperTitle, mp.OriginalTitle, - mp.BookTitle, mp.Year, mp.Date, mp.Publisher: String, - mp.JournalId, mp.ConferenceSeriesId, mp.ConferenceInstanceId, - mp.Volume, mp.Issue, mp.FirstPage, mp.LastPage, - mp.ReferenceCount, mp.CitationCount, mp.EstimatedCitation, - mp.OriginalVenue, mp.FamilyId, mp.CreatedDate) + MagPapers( + mp.PaperId, + mp.Rank, + DoiBoostMappingUtil.normalizeDoi(mp.Doi), + mp.DocType, + mp.PaperTitle, + mp.OriginalTitle, + mp.BookTitle, + mp.Year, + mp.Date, + mp.Publisher: String, + mp.JournalId, + mp.ConferenceSeriesId, + mp.ConferenceInstanceId, + mp.Volume, + mp.Issue, + mp.FirstPage, + mp.LastPage, + mp.ReferenceCount, + mp.CitationCount, + mp.EstimatedCitation, + mp.OriginalVenue, + mp.FamilyId, + mp.CreatedDate + ) })(Encoders.product[MagPapers]) } @@ -31,22 +52,29 @@ object SparkProcessMAG { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") import spark.implicits._ - implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication] - implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs) + implicit val mapEncoderPubs: Encoder[Publication] = + org.apache.spark.sql.Encoders.kryo[Publication] + implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = + Encoders.tuple(Encoders.STRING, mapEncoderPubs) logger.info("Phase 1) make uninue DOI in Papers:") val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers] @@ -58,16 +86,23 @@ object SparkProcessMAG { logger.info("Phase 0) Enrich Publication with description") val pa = spark.read.load(s"$sourcePath/PaperAbstractsInvertedIndex").as[MagPaperAbstract] - pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"$workingPath/PaperAbstract") + pa.map(ConversionUtil.transformPaperAbstract) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/PaperAbstract") logger.info("Phase 3) Group Author by PaperId") val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor] val affiliation = spark.read.load(s"$sourcePath/Affiliations").as[MagAffiliation] - val paperAuthorAffiliation = spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation] + val paperAuthorAffiliation = + spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation] - paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId"))) - .map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null, a.AuthorSequenceNumber)) } + paperAuthorAffiliation + .joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId"))) + .map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => + (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null, a.AuthorSequenceNumber)) + } .joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left") .map(s => { val mpa = s._1._2 @@ -76,79 +111,133 @@ object SparkProcessMAG { MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName, mpa.sequenceNumber) } else mpa - }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation", $"sequenceNumber")).as("authors")) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_1_paper_authors") + }) + .groupBy("PaperId") + .agg(collect_list(struct($"author", $"affiliation", $"sequenceNumber")).as("authors")) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/merge_step_1_paper_authors") - logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors") + logger.info( + "Phase 4) create First Version of publication Entity with Paper Journal and Authors" + ) val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal] - val papers = spark.read.load((s"$workingPath/Papers_distinct")).as[MagPapers] + val papers = spark.read.load(s"$workingPath/Papers_distinct").as[MagPapers] - val paperWithAuthors = spark.read.load(s"$workingPath/merge_step_1_paper_authors").as[MagPaperWithAuthorList] + val paperWithAuthors = + spark.read.load(s"$workingPath/merge_step_1_paper_authors").as[MagPaperWithAuthorList] - val firstJoin = papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left") - firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left") + val firstJoin = + papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left") + firstJoin + .joinWith( + paperWithAuthors, + firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), + "left" + ) .map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) } - .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_2") - + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/merge_step_2") var magPubs: Dataset[(String, Publication)] = - spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + spark.read + .load(s"$workingPath/merge_step_2") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)) + .as[(String, Publication)] + val conference = spark.read + .load(s"$sourcePath/ConferenceInstances") + .select( + $"ConferenceInstanceId".as("ci"), + $"DisplayName", + $"Location", + $"StartDate", + $"EndDate" + ) + val conferenceInstance = conference + .joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) + .select( + $"_1.ci", + $"_1.DisplayName", + $"_1.Location", + $"_1.StartDate", + $"_1.EndDate", + $"_2.PaperId" + ) + .as[MagConferenceInstance] - val conference = spark.read.load(s"$sourcePath/ConferenceInstances") - .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate", $"EndDate") - val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) - .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate", $"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] - - - magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") + magPubs + .joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithConferenceInfo(item)) .write .mode(SaveMode.Overwrite) .save(s"$workingPath/merge_step_3") + val paperAbstract = spark.read.load(s"$workingPath/PaperAbstract").as[MagPaperAbstract] - val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract] - - - magPubs = spark.read.load(s"$workingPath/merge_step_3").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] - - magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") - .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") + magPubs = spark.read + .load(s"$workingPath/merge_step_3") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)) + .as[(String, Publication)] + magPubs + .joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") + .map(item => ConversionUtil.updatePubsWithDescription(item)) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") - magPubs = spark.read.load(s"$workingPath/merge_step_4").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + magPubs = spark.read + .load(s"$workingPath/merge_step_4") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)) + .as[(String, Publication)] - val fos = spark.read.load(s"$sourcePath/FieldsOfStudy").select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType") + val fos = spark.read + .load(s"$sourcePath/FieldsOfStudy") + .select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType") val pfos = spark.read.load(s"$sourcePath/PaperFieldsOfStudy") - val paperField = pfos.joinWith(fos, fos("fos").equalTo(pfos("FieldOfStudyId"))) + val paperField = pfos + .joinWith(fos, fos("fos").equalTo(pfos("FieldOfStudyId"))) .select($"_1.FieldOfStudyId", $"_2.DisplayName", $"_2.MainType", $"_1.PaperId", $"_1.Score") - .groupBy($"PaperId").agg(collect_list(struct($"FieldOfStudyId", $"DisplayName", $"MainType", $"Score")).as("subjects")) + .groupBy($"PaperId") + .agg( + collect_list(struct($"FieldOfStudyId", $"DisplayName", $"MainType", $"Score")) + .as("subjects") + ) .as[MagFieldOfStudy] - magPubs.joinWith(paperField, col("_1") - .equalTo(paperField("PaperId")), "left") + magPubs + .joinWith( + paperField, + col("_1") + .equalTo(paperField("PaperId")), + "left" + ) .map(item => ConversionUtil.updatePubsWithSubject(item)) - .write.mode(SaveMode.Overwrite) + .write + .mode(SaveMode.Overwrite) .save(s"$workingPath/mag_publication") - spark.read.load(s"$workingPath/mag_publication").as[Publication] + spark.read + .load(s"$workingPath/mag_publication") + .as[Publication] .filter(p => p.getId != null) .groupByKey(p => p.getId) .reduceGroups((a: Publication, b: Publication) => ConversionUtil.mergePublication(a, b)) .map(_._2) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") - + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/magPublication") } } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 11031f9ca..7c58afc09 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -15,15 +15,20 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ +case class ORCIDItem(doi: String, authors: List[OrcidAuthor]) {} -case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} -case class OrcidAuthor(oid:String, name:Option[String], surname:Option[String], creditName:Option[String], otherNames:Option[List[String]], errorCode:Option[String]){} -case class OrcidWork(oid:String, doi:String) +case class OrcidAuthor( + oid: String, + name: Option[String], + surname: Option[String], + creditName: Option[String], + otherNames: Option[List[String]], + errorCode: Option[String] +) {} +case class OrcidWork(oid: String, doi: String) +case class ORCIDElement(doi: String, authors: List[ORCIDItem]) {} - - -case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {} object ORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) val mapper = new ObjectMapper() @@ -41,7 +46,7 @@ object ORCIDToOAF { def extractValueFromInputString(input: String): (String, String) = { val i = input.indexOf('[') - if (i <5) { + if (i < 5) { return null } val orcidList = input.substring(i, input.length - 1) @@ -51,17 +56,16 @@ object ORCIDToOAF { } else null } - - def strValid(s:Option[String]) : Boolean = { + def strValid(s: Option[String]): Boolean = { s.isDefined && s.get.nonEmpty } - def authorValid(author:OrcidAuthor): Boolean ={ + def authorValid(author: OrcidAuthor): Boolean = { if (strValid(author.name) && strValid(author.surname)) { return true } if (strValid(author.surname)) { - return true + return true } if (strValid(author.creditName)) { return true @@ -70,37 +74,35 @@ object ORCIDToOAF { false } - - def extractDOIWorks(input:String): List[OrcidWork] = { + def extractDOIWorks(input: String): List[OrcidWork] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val oid = (json \ "workDetail" \"oid").extractOrElse[String](null) + val oid = (json \ "workDetail" \ "oid").extractOrElse[String](null) if (oid == null) return List() - val doi:List[(String, String)] = for { - JObject(extIds) <- json \ "workDetail" \"extIds" + val doi: List[(String, String)] = for { + JObject(extIds) <- json \ "workDetail" \ "extIds" JField("type", JString(typeValue)) <- extIds - JField("value", JString(value)) <- extIds + JField("value", JString(value)) <- extIds if "doi".equalsIgnoreCase(typeValue) } yield (typeValue, DoiBoostMappingUtil.normalizeDoi(value)) if (doi.nonEmpty) { - return doi.map(l =>OrcidWork(oid, l._2)) + return doi.map(l => OrcidWork(oid, l._2)) } List() } - def convertORCIDAuthor(input:String): OrcidAuthor = { + def convertORCIDAuthor(input: String): OrcidAuthor = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - (json \"authorData" ).extractOrElse[OrcidAuthor](null) - } + (json \ "authorData").extractOrElse[OrcidAuthor](null) + } - - def convertTOOAF(input:ORCIDItem) :Publication = { + def convertTOOAF(input: ORCIDItem): Publication = { val doi = input.doi - val pub:Publication = new Publication + val pub: Publication = new Publication pub.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) pub.setDataInfo(generateDataInfo()) @@ -108,9 +110,9 @@ object ORCIDToOAF { if (pub.getId == null) return null - try{ + try { - val l:List[Author]= input.authors.map(a=> { + val l: List[Author] = input.authors.map(a => { generateAuthor(a) })(collection.breakOut) @@ -125,30 +127,38 @@ object ORCIDToOAF { } } - def generateOricPIDDatainfo():DataInfo = { - val di =DoiBoostMappingUtil.generateDataInfo("0.91") + def generateOricPIDDatainfo(): DataInfo = { + val di = DoiBoostMappingUtil.generateDataInfo("0.91") di.getProvenanceaction.setClassid(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY) di.getProvenanceaction.setClassname(ModelConstants.HARVESTED) di } - def generateAuthor(o : OrcidAuthor): Author = { + def generateAuthor(o: OrcidAuthor): Author = { val a = new Author if (strValid(o.name)) { - a.setName(o.name.get.capitalize) + a.setName(o.name.get.capitalize) } if (strValid(o.surname)) { a.setSurname(o.surname.get.capitalize) } - if(strValid(o.name) && strValid(o.surname)) + if (strValid(o.name) && strValid(o.surname)) a.setFullname(s"${o.name.get.capitalize} ${o.surname.get.capitalize}") else if (strValid(o.creditName)) a.setFullname(o.creditName.get) if (StringUtils.isNotBlank(o.oid)) - a.setPid(List(createSP(o.oid, ModelConstants.ORCID, ModelConstants.DNET_PID_TYPES, generateOricPIDDatainfo())).asJava) + a.setPid( + List( + createSP( + o.oid, + ModelConstants.ORCID, + ModelConstants.DNET_PID_TYPES, + generateOricPIDDatainfo() + ) + ).asJava + ) a } - -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 1b189e296..95a1f5a19 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -10,11 +10,11 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def run(spark: SparkSession, workingPath: String, targetPath: String): Unit = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - val dataset: Dataset[ORCIDItem] = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + val dataset: Dataset[ORCIDItem] = + spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) @@ -22,15 +22,21 @@ object SparkConvertORCIDToOAF { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkConvertORCIDToOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index 153be5dd1..7b6408417 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -17,45 +17,72 @@ object SparkPreprocessORCID { } - def run(spark: SparkSession, sourcePath: String, workingPath: String): Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val inputRDD: RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s != null).filter(s => ORCIDToOAF.authorValid(s)) + val inputRDD: RDD[OrcidAuthor] = spark.sparkContext + .textFile(s"$sourcePath/authors") + .map(s => ORCIDToOAF.convertORCIDAuthor(s)) + .filter(s => s != null) + .filter(s => ORCIDToOAF.authorValid(s)) - spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") + spark + .createDataset(inputRDD) + .as[OrcidAuthor] + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/author") - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s != null) + val res = spark.sparkContext + .textFile(s"$sourcePath/works") + .flatMap(s => ORCIDToOAF.extractDOIWorks(s)) + .filter(s => s != null) - spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") + spark + .createDataset(res) + .as[OrcidWork] + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/works") val authors: Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] val works: Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] - works.joinWith(authors, authors("oid").equalTo(works("oid"))) + works + .joinWith(authors, authors("oid").equalTo(works("oid"))) .map(i => { val doi = i._1.doi val author = i._2 (doi, author) - }).groupBy(col("_1").alias("doi")) - .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] + }) + .groupBy(col("_1").alias("doi")) + .agg(collect_list(col("_2")).alias("authors")) + .as[ORCIDItem] .map(s => fixORCIDItem(s)) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/orcidworksWithAuthor") } def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkConvertORCIDToOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") val workingPath = parser.get("workingPath") diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 70290018d..9f7f9d18f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -13,28 +13,35 @@ object SparkMapUnpayWallToOAF { def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkMapDumpIntoOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") val inputRDD: RDD[String] = spark.sparkContext.textFile(s"$sourcePath") logger.info("Converting UnpayWall to OAF") - val d: Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)).as[Publication] + val d: Dataset[Publication] = spark + .createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)) + .as[Publication] d.write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index bf5694965..bbdc80b1d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -12,33 +12,41 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ - - -case class OALocation(evidence:Option[String], host_type:Option[String], is_best:Option[Boolean], license: Option[String], pmh_id:Option[String], updated:Option[String], - url:Option[String], url_for_landing_page:Option[String], url_for_pdf:Option[String], version:Option[String]) {} - - - +case class OALocation( + evidence: Option[String], + host_type: Option[String], + is_best: Option[Boolean], + license: Option[String], + pmh_id: Option[String], + updated: Option[String], + url: Option[String], + url_for_landing_page: Option[String], + url_for_pdf: Option[String], + version: Option[String] +) {} object UnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(getClass) - - def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { - if(input == null || input.equalsIgnoreCase("close")) + def get_unpaywall_color(input: String): Option[OpenAccessRoute] = { + if (input == null || input.equalsIgnoreCase("close")) return None - if(input.equalsIgnoreCase("green")) + if (input.equalsIgnoreCase("green")) return Some(OpenAccessRoute.green) - if(input.equalsIgnoreCase("bronze")) + if (input.equalsIgnoreCase("bronze")) return Some(OpenAccessRoute.bronze) - if(input.equalsIgnoreCase("hybrid")) + if (input.equalsIgnoreCase("hybrid")) return Some(OpenAccessRoute.hybrid) else return Some(OpenAccessRoute.gold) } - def get_color(is_oa:Boolean, location: OALocation, journal_is_oa:Boolean):Option[OpenAccessRoute] = { + def get_color( + is_oa: Boolean, + location: OALocation, + journal_is_oa: Boolean + ): Option[OpenAccessRoute] = { if (is_oa) { if (location.host_type.isDefined) { { @@ -62,23 +70,22 @@ object UnpayWallToOAF { None } - - def convertToOAF(input:String):Publication = { + def convertToOAF(input: String): Publication = { val pub = new Publication implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val doi = DoiBoostMappingUtil.normalizeDoi((json \"doi").extract[String]) + val doi = DoiBoostMappingUtil.normalizeDoi((json \ "doi").extract[String]) - if(doi == null) + if (doi == null) return null - val is_oa = (json\ "is_oa").extract[Boolean] + val is_oa = (json \ "is_oa").extract[Boolean] - val journal_is_oa= (json\ "journal_is_oa").extract[Boolean] + val journal_is_oa = (json \ "journal_is_oa").extract[Boolean] - val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) + val oaLocation: OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) val colour = get_unpaywall_color((json \ "oa_status").extractOrElse[String](null)) @@ -88,9 +95,9 @@ object UnpayWallToOAF { if (!is_oa) return null - if(oaLocation== null || oaLocation.url.isEmpty) - return null - val i :Instance= new Instance() + if (oaLocation == null || oaLocation.url.isEmpty) + return null + val i: Instance = new Instance() i.setCollectedfrom(createUnpayWallCollectedFrom()) // i.setAccessright(getOpenAccessQualifier()) @@ -122,7 +129,4 @@ object UnpayWallToOAF { } - - - } diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala index 41730ade0..61d2eef29 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala @@ -6,15 +6,11 @@ import org.junit.jupiter.api.Test class DoiBoostHostedByMapTest { @Test - def idDSGeneration():Unit = { - val s ="doajarticles::0066-782X" - - + def idDSGeneration(): Unit = { + val s = "doajarticles::0066-782X" println(DoiBoostMappingUtil.generateDSId(s)) - } - } diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala index a9a841ee9..391d45b10 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala @@ -6,41 +6,39 @@ import org.junit.jupiter.api.Test class NormalizeDOITest { @Test - def doiDSLowerCase():Unit = { - val doi ="10.1042/BCJ20160876" + def doiDSLowerCase(): Unit = { + val doi = "10.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi).equals(doi.toLowerCase())) } - @Test - def doiFiltered():Unit = { + def doiFiltered(): Unit = { val doi = "0.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi) == null) } @Test - def doiFiltered2():Unit = { + def doiFiltered2(): Unit = { val doi = "https://doi.org/0.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi) == null) } - @Test - def doiCleaned():Unit = { + def doiCleaned(): Unit = { val doi = "https://doi.org/10.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase())) } @Test - def doiCleaned1():Unit = { + def doiCleaned1(): Unit = { val doi = "https://doi.org/10.1042/ BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase())) } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 71dbf27be..ad3d4b3d4 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -12,20 +12,24 @@ import scala.collection.JavaConverters._ import scala.io.Source import scala.util.matching.Regex - class CrossrefMappingTest { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val mapper = new ObjectMapper() - - @Test def testFunderRelationshipsMapping(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json")).mkString - val funder_doi = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")).mkString - val funder_name = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")).mkString - + val template = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json") + ) + .mkString + val funder_doi = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")) + .mkString + val funder_name = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")) + .mkString for (line <- funder_doi.lines) { val json = template.replace("%s", line) @@ -43,7 +47,8 @@ class CrossrefMappingTest { def checkRelation(generatedOAF: List[Oaf]): Unit = { - val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] + val rels: List[Relation] = + generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] assertFalse(rels.isEmpty) rels.foreach(relation => { val relJson = mapper.writeValueAsString(relation) @@ -59,22 +64,22 @@ class CrossrefMappingTest { } - @Test - def testSum() :Unit = { - val from:Long = 1613135645000L - val delta:Long = 1000000L - - - println(s"updating from value: $from -> ${from+delta}") + def testSum(): Unit = { + val from: Long = 1613135645000L + val delta: Long = 1000000L + println(s"updating from value: $from -> ${from + delta}") } @Test - def testOrcidID() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/orcid_data.json")).mkString - + def testOrcidID(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/orcid_data.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -85,17 +90,18 @@ class CrossrefMappingTest { val items = resultList.filter(p => p.isInstanceOf[Result]) - mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) items.foreach(p => println(mapper.writeValueAsString(p))) - } @Test - def testEmptyTitle() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/empty_title.json")).mkString - + def testEmptyTitle(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/empty_title.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -106,17 +112,16 @@ class CrossrefMappingTest { val items = resultList.filter(p => p.isInstanceOf[Result]) - mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) items.foreach(p => println(mapper.writeValueAsString(p))) - } - @Test def testPeerReviewed(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json")) + .mkString mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) assertNotNull(json) @@ -128,12 +133,8 @@ class CrossrefMappingTest { val items = resultList.filter(p => p.isInstanceOf[Result]) - items.foreach(p => logger.info(mapper.writeValueAsString(p))) - - - } def extractECAward(award: String): String = { @@ -143,21 +144,21 @@ class CrossrefMappingTest { null } - @Test def extractECTest(): Unit = { - val s = "FP7/2007-2013" + val s = "FP7/2007-2013" val awardExtracted = extractECAward(s) println(awardExtracted) println(DHPUtils.md5(awardExtracted)) - } @Test def testJournalRelation(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/awardTest.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/awardTest.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty) @@ -165,20 +166,19 @@ class CrossrefMappingTest { val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) - val rels:List[Relation] = resultList.filter(p => p.isInstanceOf[Relation]).map(r=> r.asInstanceOf[Relation]) - - + val rels: List[Relation] = + resultList.filter(p => p.isInstanceOf[Relation]).map(r => r.asInstanceOf[Relation]) rels.foreach(s => logger.info(s.getTarget)) - assertEquals(rels.size, 6 ) - + assertEquals(rels.size, 6) } - @Test def testConvertBookFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/book.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/book.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -199,42 +199,64 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); val collectedFromList = result.getCollectedfrom.asScala - assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") - - assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") + assert( + collectedFromList.exists(c => + c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") + ), + "Wrong collected from assertion" + ) + assert( + collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), + "Wrong collected from assertion" + ) val relevantDates = result.getRelevantdate.asScala - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print") + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), + "Missing relevant date of type created" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), + "Missing relevant date of type published-online" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), + "Missing relevant date of type published-print" + ) val rels = resultList.filter(p => p.isInstanceOf[Relation]) assert(rels.isEmpty) } - @Test def testConvertPreprintFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/preprint.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/preprint.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -255,44 +277,72 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); val collectedFromList = result.getCollectedfrom.asScala - assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") - - assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") + assert( + collectedFromList.exists(c => + c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") + ), + "Wrong collected from assertion" + ) + assert( + collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), + "Wrong collected from assertion" + ) val relevantDates = result.getRelevantdate.asScala - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("available")), "Missing relevant date of type available") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("accepted")), "Missing relevant date of type accepted") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print") + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), + "Missing relevant date of type created" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("available")), + "Missing relevant date of type available" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("accepted")), + "Missing relevant date of type accepted" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), + "Missing relevant date of type published-online" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), + "Missing relevant date of type published-print" + ) val rels = resultList.filter(p => p.isInstanceOf[Relation]) assert(rels.isEmpty) } - @Test def testConvertDatasetFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dataset.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dataset.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -313,19 +363,24 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); @@ -333,7 +388,9 @@ class CrossrefMappingTest { @Test def testConvertArticleFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -354,32 +411,47 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); val collectedFromList = result.getCollectedfrom.asScala - assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") - - assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") + assert( + collectedFromList.exists(c => + c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") + ), + "Wrong collected from assertion" + ) + assert( + collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), + "Wrong collected from assertion" + ) val relevantDates = result.getRelevantdate.asScala - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), + "Missing relevant date of type created" + ) val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] assertFalse(rels.isEmpty) @@ -393,15 +465,14 @@ class CrossrefMappingTest { }) - } - - @Test def testSetDateOfAcceptanceCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dump_file.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dump_file.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -421,8 +492,13 @@ class CrossrefMappingTest { @Test def testNormalizeDOI(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json")).mkString - val line :String = "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}]," + val template = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json") + ) + .mkString + val line: String = + "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}]," val json = template.replace("%s", line) val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) @@ -431,13 +507,17 @@ class CrossrefMappingTest { result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi"))) assertTrue(result.getPid.size() == 1) - result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase()))) + result.getPid.asScala.foreach(pid => + assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())) + ) } @Test def testNormalizeDOI2(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")).mkString + val template = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")) + .mkString val resultList: List[Oaf] = Crossref2Oaf.convert(template) assertTrue(resultList.nonEmpty) @@ -446,14 +526,19 @@ class CrossrefMappingTest { result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi"))) assertTrue(result.getPid.size() == 1) - result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase()))) + result.getPid.asScala.foreach(pid => + assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())) + ) } @Test - def testLicenseVorClosed() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_vor.json")).mkString - + def testLicenseVorClosed(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_vor.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -462,25 +547,30 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) - - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))) + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals("https://www.springer.com/vor") + ) + ) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED")) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) - - - } @Test - def testLicenseOpen() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_open.json")).mkString - + def testLicenseOpen(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_open.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -489,21 +579,35 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html"))) + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html" + ) + ) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + assertTrue( + item.getInstance().asScala exists (i => + i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid + ) + ) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) } @Test - def testLicenseEmbargoOpen() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json")).mkString - + def testLicenseEmbargoOpen(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json" + ) + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -512,21 +616,35 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + assertTrue( + item.getInstance().asScala exists (i => + i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid + ) + ) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) } @Test - def testLicenseEmbargo() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo.json")).mkString - + def testLicenseEmbargo(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/doiboost/crossref/publication_license_embargo.json" + ) + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -535,35 +653,18 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) - mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) - println(mapper.writeValueAsString(item)) - - } - - - @Test - def testLicenseEmbargoDateTime() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json")).mkString - - - assertNotNull(json) - assertFalse(json.isEmpty); - - val resultList: List[Oaf] = Crossref2Oaf.convert(json) - - assertTrue(resultList.nonEmpty) - - - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) + ) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) @@ -571,9 +672,14 @@ class CrossrefMappingTest { } @Test - def testMultipleURLs() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/multiple_urls.json")).mkString - + def testLicenseEmbargoDateTime(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json" + ) + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -582,12 +688,47 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) + ) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")) + ) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testMultipleURLs(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/multiple_urls.json") + ) + .mkString + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] assertEquals(1, item.getInstance().size()) assertEquals(1, item.getInstance().get(0).getUrl().size()) - assertEquals("https://doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) + assertEquals( + "https://doi.org/10.1016/j.jas.2019.105013", + item.getInstance().get(0).getUrl().get(0) + ) //println(mapper.writeValueAsString(item)) } diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala index 611f3b323..882c0d8a0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala @@ -12,43 +12,35 @@ import org.slf4j.{Logger, LoggerFactory} import java.sql.Timestamp import scala.io.Source - - class MAGMappingTest { val logger: Logger = LoggerFactory.getLogger(getClass) val mapper = new ObjectMapper() - - - @Test - def testSplitter():Unit = { + def testSplitter(): Unit = { val s = "sports.team" - if (s.contains(".")) { - println(s.split("\\.")head) + println(s.split("\\.") head) } } - - @Test - def testDate() :Unit = { + def testDate(): Unit = { - val p:Timestamp = Timestamp.valueOf("2011-10-02 00:00:00") + val p: Timestamp = Timestamp.valueOf("2011-10-02 00:00:00") - println(p.toString.substring(0,10)) + println(p.toString.substring(0, 10)) } - - @Test def buildInvertedIndexTest(): Unit = { - val json_input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/mag/invertedIndex.json")).mkString + val json_input = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/mag/invertedIndex.json")) + .mkString val description = ConversionUtil.convertInvertedIndexString(json_input) assertNotNull(description) assertTrue(description.nonEmpty) @@ -56,10 +48,9 @@ class MAGMappingTest { logger.debug(description) } + @Test - def normalizeDoiTest():Unit = { - - + def normalizeDoiTest(): Unit = { implicit val formats = DefaultFormats @@ -78,8 +69,9 @@ class MAGMappingTest { val schema = Encoders.product[MagPapers].schema import spark.implicits._ - val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] - val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + val magPapers: Dataset[MagPapers] = + spark.read.option("multiline", true).schema(schema).json(path).as[MagPapers] + val ret: Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) assertTrue(ret.count == 10) ret.take(10).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) @@ -87,7 +79,7 @@ class MAGMappingTest { } @Test - def normalizeDoiTest2():Unit = { + def normalizeDoiTest2(): Unit = { import org.json4s.DefaultFormats @@ -108,15 +100,13 @@ class MAGMappingTest { val schema = Encoders.product[MagPapers].schema import spark.implicits._ - val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] - val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + val magPapers: Dataset[MagPapers] = + spark.read.option("multiline", true).schema(schema).json(path).as[MagPapers] + val ret: Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) assertTrue(ret.count == 8) ret.take(8).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) spark.close() //ret.take(8).foreach(mp => println(write(mp))) } - } - - diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala index 7c8f01f81..e5bf1bd5f 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -19,8 +19,10 @@ class MappingORCIDToOAFTest { val mapper = new ObjectMapper() @Test - def testExtractData():Unit ={ - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/orcid/dataOutput")).mkString + def testExtractData(): Unit = { + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/orcid/dataOutput")) + .mkString assertNotNull(json) assertFalse(json.isEmpty) json.lines.foreach(s => { @@ -29,10 +31,10 @@ class MappingORCIDToOAFTest { } @Test - def testOAFConvert(@TempDir testDir: Path):Unit ={ - val sourcePath:String = getClass.getResource("/eu/dnetlib/doiboost/orcid/datasets").getPath - val targetPath: String =s"${testDir.toString}/output/orcidPublication" - val workingPath =s"${testDir.toString}/wp/" + def testOAFConvert(@TempDir testDir: Path): Unit = { + val sourcePath: String = getClass.getResource("/eu/dnetlib/doiboost/orcid/datasets").getPath + val targetPath: String = s"${testDir.toString}/output/orcidPublication" + val workingPath = s"${testDir.toString}/wp/" val conf = new SparkConf() conf.setMaster("local[*]") @@ -46,18 +48,14 @@ class MappingORCIDToOAFTest { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - SparkPreprocessORCID.run( spark,sourcePath, workingPath) + SparkPreprocessORCID.run(spark, sourcePath, workingPath) - SparkConvertORCIDToOAF.run(spark, workingPath,targetPath) + SparkConvertORCIDToOAF.run(spark, workingPath, targetPath) val mapper = new ObjectMapper() - - val oA = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem].count() - - val p: Dataset[Publication] = spark.read.load(targetPath).as[Publication] assertTrue(oA == p.count()) @@ -65,19 +63,18 @@ class MappingORCIDToOAFTest { spark.close() - } - @Test - def testExtractDat1():Unit ={ + def testExtractDat1(): Unit = { + val aList: List[OrcidAuthor] = List( + OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null), + OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null), + OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null) + ) - - val aList: List[OrcidAuthor] = List(OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null ), - OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null ), OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null )) - - val orcid:ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList) + val orcid: ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList) val oaf = ORCIDToOAF.convertTOOAF(orcid) assert(oaf.getPid.size() == 1) @@ -85,10 +82,6 @@ class MappingORCIDToOAFTest { oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876"))) //println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid))) - } - - - } diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala index 6671758b2..542faa8ad 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala @@ -14,41 +14,43 @@ class UnpayWallMappingTest { val logger: Logger = LoggerFactory.getLogger(getClass) val mapper = new ObjectMapper() - @Test - def testMappingToOAF():Unit ={ + def testMappingToOAF(): Unit = { - val Ilist = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/uw/input.json")).mkString + val Ilist = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/uw/input.json")) + .mkString - var i:Int = 0 - for (line <-Ilist.lines) { + var i: Int = 0 + for (line <- Ilist.lines) { val p = UnpayWallToOAF.convertToOAF(line) - if(p!= null) { - assertTrue(p.getInstance().size()==1) - if (i== 0){ + if (p != null) { + assertTrue(p.getInstance().size() == 1) + if (i == 0) { assertTrue(p.getPid.get(0).getValue.equals("10.1038/2211089b0")) } - if (i== 1){ + if (i == 1) { assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00058.s001")) } - if (i== 2){ + if (i == 2) { assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00086.s001")) } logger.info(s"ID : ${p.getId}") } assertNotNull(line) assertTrue(line.nonEmpty) - i = i+1 + i = i + 1 } - - - val l = Ilist.lines.next() + val l = Ilist.lines.next() val item = UnpayWallToOAF.convertToOAF(l) - assertEquals(item.getInstance().get(0).getAccessright.getOpenAccessRoute, OpenAccessRoute.bronze) + assertEquals( + item.getInstance().get(0).getAccessright.getOpenAccessRoute, + OpenAccessRoute.bronze + ) logger.info(mapper.writeValueAsString(item)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index ad4e1c96e..c5a2b4024 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -4,137 +4,190 @@ import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +case class HostedByItemType( + id: String, + officialname: String, + issn: String, + eissn: String, + lissn: String, + openAccess: Boolean +) {} -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} -case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} +case class HostedByInfo( + id: String, + officialname: String, + journal_id: String, + provenance: String, + id_type: String +) {} object Aggregators { - - - def getId(s1:String, s2:String) : String = { - if (s1.startsWith("10|")){ - return s1} - s2 - } - - def getValue(s1:String, s2:String) : String = { - if(!s1.equals("")){ + def getId(s1: String, s2: String): String = { + if (s1.startsWith("10|")) { return s1 } s2 } + def getValue(s1: String, s2: String): String = { + if (!s1.equals("")) { + return s1 + } + s2 + } - def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { - val transformedData : Dataset[(String, HostedByItemType)] = df + def explodeHostedByItemType( + df: Dataset[(String, HostedByItemType)] + ): Dataset[(String, HostedByItemType)] = { + val transformedData: Dataset[(String, HostedByItemType)] = df .groupByKey(_._1)(Encoders.STRING) .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:(String, HostedByItemType)) => res + .map { case (id: String, res: (String, HostedByItemType)) => + res }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) transformedData } - val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { - override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) - override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { - return merge(b, a) - } - override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = { - if (b1 == null){ - return b2 + val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = + new Aggregator[ + (String, HostedByItemType), + (String, HostedByItemType), + (String, HostedByItemType) + ] { + + override def zero: (String, HostedByItemType) = + ("", HostedByItemType("", "", "", "", "", false)) + + override def reduce( + b: (String, HostedByItemType), + a: (String, HostedByItemType) + ): (String, HostedByItemType) = { + return merge(b, a) } - if(b2 == null){ - return b1 - } - if(b1._2.id.startsWith("10|")){ - return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + override def merge( + b1: (String, HostedByItemType), + b2: (String, HostedByItemType) + ): (String, HostedByItemType) = { + if (b1 == null) { + return b2 + } + if (b2 == null) { + return b1 + } + if (b1._2.id.startsWith("10|")) { + return ( + b1._1, + HostedByItemType( + b1._2.id, + b1._2.officialname, + b1._2.issn, + b1._2.eissn, + b1._2.lissn, + b1._2.openAccess || b2._2.openAccess + ) + ) + + } + return ( + b2._1, + HostedByItemType( + b2._2.id, + b2._2.officialname, + b2._2.issn, + b2._2.eissn, + b2._2.lissn, + b1._2.openAccess || b2._2.openAccess + ) + ) } - return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess)) - } - override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction - override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + override def finish(reduction: (String, HostedByItemType)): (String, HostedByItemType) = + reduction - override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) - }.toColumn + override def bufferEncoder: Encoder[(String, HostedByItemType)] = + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + override def outputEncoder: Encoder[(String, HostedByItemType)] = + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + }.toColumn + def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = + new Aggregator[EntityInfo, EntityInfo, EntityInfo] { + override def zero: EntityInfo = EntityInfo.newInstance("", "", "") - - def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ - override def zero: EntityInfo = EntityInfo.newInstance("","","") - - override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { - return merge(b, a) - } - override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { - if (b1 == null){ - return b2 + override def reduce(b: EntityInfo, a: EntityInfo): EntityInfo = { + return merge(b, a) } - if(b2 == null){ - return b1 + + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null) { + return b2 + } + if (b2 == null) { + return b1 + } + if (!b1.getHostedById.equals("")) { + b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) + return b1 + } + b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) + b2 + } - if(!b1.getHostedById.equals("")){ - b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) - return b1 - } - b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) - b2 + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - } - override def finish(reduction: EntityInfo): EntityInfo = reduction - override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn - override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - }.toColumn - - def resultToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { - val transformedData : Dataset[EntityInfo] = df + def resultToSingleId(df: Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData: Dataset[EntityInfo] = df .groupByKey(_.getId)(Encoders.STRING) .agg(Aggregators.resultToSingleIdAggregator) - .map{ - case (id:String , res: EntityInfo) => res + .map { case (id: String, res: EntityInfo) => + res }(Encoders.bean(classOf[EntityInfo])) transformedData } - def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ - override def zero: EntityInfo = EntityInfo.newInstance("","","") + def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = + new Aggregator[EntityInfo, EntityInfo, EntityInfo] { + override def zero: EntityInfo = EntityInfo.newInstance("", "", "") - override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { - return merge(b, a) - } - override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { - if (b1 == null){ - return b2 + override def reduce(b: EntityInfo, a: EntityInfo): EntityInfo = { + return merge(b, a) } - if(b2 == null){ - return b1 + + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null) { + return b2 + } + if (b2 == null) { + return b1 + } + if (!b1.getHostedById.equals("")) { + return b1 + } + b2 + } - if(!b1.getHostedById.equals("")){ - return b1 - } - b2 + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - } - override def finish(reduction: EntityInfo): EntityInfo = reduction - override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn - override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - }.toColumn - - - def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { - val transformedData : Dataset[EntityInfo] = df + def datasourceToSingleId(df: Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData: Dataset[EntityInfo] = df .groupByKey(_.getHostedById)(Encoders.STRING) .agg(Aggregators.datasourceToSingleIdAggregator) - .map{ - case (id:String , res: EntityInfo) => res + .map { case (id: String, res: EntityInfo) => + res }(Encoders.bean(classOf[EntityInfo])) transformedData diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 38af3eee4..80c672929 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -14,7 +14,8 @@ import org.slf4j.{Logger, LoggerFactory} object SparkApplyHostedByMapToDatasource { def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { - dats.joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") + dats + .joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") .map(t2 => { val d: Datasource = t2._1 if (t2._2 != null) { @@ -31,14 +32,21 @@ object SparkApplyHostedByMapToDatasource { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") @@ -51,20 +59,27 @@ object SparkApplyHostedByMapToDatasource { val mapper = new ObjectMapper() - val dats: Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + val dats: Dataset[Datasource] = spark.read + .textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) + val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId( + spark.read + .textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + ) - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + applyHBtoDats(pinfo, dats).write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath) - spark.read.textFile(outputPath) + spark.read + .textFile(outputPath) .write .mode(SaveMode.Overwrite) .option("compression", "gzip") .text(graphPath + "/datasource") } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 204325982..a900fc241 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -16,7 +16,8 @@ import scala.collection.JavaConverters._ object SparkApplyHostedByMapToResult { def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { - pubs.joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") + pubs + .joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") .map(t2 => { val p: Publication = t2._1 if (t2._2 != null) { @@ -27,7 +28,14 @@ object SparkApplyHostedByMapToResult { inst.getHostedby.setKey(ei.getHostedById) inst.getHostedby.setValue(ei.getName) if (ei.getOpenAccess) { - inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) + inst.setAccessright( + OafMapperUtils.accessRight( + ModelConstants.ACCESS_RIGHT_OPEN, + "Open Access", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) + ) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } @@ -40,46 +48,54 @@ object SparkApplyHostedByMapToResult { def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") val preparedInfoPath = parser.get("preparedInfoPath") - implicit val formats = DefaultFormats - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs: Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + val pubs: Dataset[Publication] = spark.read + .textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo: Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) + val pinfo: Dataset[EntityInfo] = spark.read + .textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + applyHBtoPubs(pinfo, pubs).write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath) - spark.read.textFile(outputPath) + spark.read + .textFile(outputPath) .write .mode(SaveMode.Overwrite) .option("compression", "gzip") .text(graphPath + "/publication") } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 87e203e4b..34798b147 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -19,7 +19,6 @@ object SparkPrepareHostedByInfoToApply { def getList(id: String, j: Journal, name: String): List[EntityInfo] = { var lst: List[EntityInfo] = List() - if (j.getIssnLinking != null && !j.getIssnLinking.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst } @@ -37,14 +36,14 @@ object SparkPrepareHostedByInfoToApply { val mapper = new ObjectMapper() - val dd: Dataset[Publication] = spark.read.textFile(publicationPath) + val dd: Dataset[Publication] = spark.read + .textFile(publicationPath) .map(r => mapper.readValue(r, classOf[Publication])) dd.filter(p => p.getJournal != null).flatMap(p => getList(p.getId, p.getJournal, "")) } - def toEntityInfo(input: String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -53,7 +52,6 @@ object SparkPrepareHostedByInfoToApply { toEntityItem(c.keys.head, c.values.head) } - def toEntityItem(journal_id: String, hbi: HostedByItemType): EntityInfo = { EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) @@ -61,62 +59,69 @@ object SparkPrepareHostedByInfoToApply { } def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { - Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") - .map(t2 => { - val res: EntityInfo = t2._1 - if (t2._2 != null) { - val ds = t2._2 - res.setHostedById(ds.getId) - res.setOpenAccess(ds.getOpenAccess) - res.setName(ds.getName) - } - res - })) + Aggregators.resultToSingleId( + res + .joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if (t2._2 != null) { + val ds = t2._2 + res.setHostedById(ds.getId) + res.setOpenAccess(ds.getOpenAccess) + res.setName(ds.getName) + } + res + }) + ) } def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val graphPath = parser.get("graphPath") val outputPath = parser.get("preparedInfoPath") val hostedByMapPath = parser.get("hostedByMapPath") - implicit val formats = DefaultFormats - logger.info("Getting the Datasources") import spark.implicits._ - //STEP1: read the hostedbymap and transform it in EntityInfo - val hostedByInfo: Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + val hostedByInfo: Dataset[EntityInfo] = + spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) - val resultInfoDataset: Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + val resultInfoDataset: Dataset[EntityInfo] = + prepareResultInfo(spark, graphPath + "/publication") //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) //to this entry we add the id of the datasource for the next step - joinResHBM(resultInfoDataset, hostedByInfo) - .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) - + joinResHBM(resultInfoDataset, hostedByInfo).write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath) } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 6dfe35623..8d8965866 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -17,9 +17,8 @@ import java.io.PrintWriter object SparkProduceHostedByMap { - - implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - + implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)): HostedByItemType = { val openaire: HostedByInfo = input._1._1 @@ -28,9 +27,33 @@ object SparkProduceHostedByMap { val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => + HostedByItemType( + openaire.id, + openaire.officialname, + openaire.journal_id, + "", + "", + isOpenAccess + ) + case Constants.EISSN => + HostedByItemType( + openaire.id, + openaire.officialname, + "", + openaire.journal_id, + "", + isOpenAccess + ) + case Constants.ISSNL => + HostedByItemType( + openaire.id, + openaire.officialname, + "", + "", + openaire.journal_id, + isOpenAccess + ) // catch the default with a variable so you can print it case whoa => null @@ -46,11 +69,16 @@ object SparkProduceHostedByMap { Serialization.write(map) - } - - def getHostedByItemType(id: String, officialname: String, issn: String, eissn: String, issnl: String, oa: Boolean): HostedByItemType = { + def getHostedByItemType( + id: String, + officialname: String, + issn: String, + eissn: String, + issnl: String, + oa: Boolean + ): HostedByItemType = { if (issn != null) { if (eissn != null) { if (issnl != null) { @@ -85,7 +113,14 @@ object SparkProduceHostedByMap { def oaToHostedbyItemType(dats: Datasource): HostedByItemType = { if (dats.getJournal != null) { - return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) + return getHostedByItemType( + dats.getId, + dats.getOfficialname.getValue, + dats.getJournal.getIssnPrinted, + dats.getJournal.getIssnOnline, + dats.getJournal.getIssnLinking, + false + ) } HostedByItemType("", "", "", "", "", false) } @@ -94,32 +129,41 @@ object SparkProduceHostedByMap { import spark.implicits._ - val mapper = new ObjectMapper() implicit var encoderD = Encoders.kryo[Datasource] - val dd: Dataset[Datasource] = spark.read.textFile(datasourcePath) + val dd: Dataset[Datasource] = spark.read + .textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[Datasource])) dd.map { ddt => oaToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } - def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { - return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssnL, true) + return getHostedByItemType( + Constants.UNIBI, + gold.getTitle, + gold.getIssn, + "", + gold.getIssnL, + true + ) } - - def goldHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def goldHostedByDataset( + spark: SparkSession, + datasourcePath: String + ): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] val mapper = new ObjectMapper() - val dd: Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[UnibiGoldModel] = spark.read + .textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) dd.map { ddt => goldToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) @@ -128,17 +172,28 @@ object SparkProduceHostedByMap { def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { - return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) + return getHostedByItemType( + Constants.DOAJ, + doaj.getJournalTitle, + doaj.getIssn, + doaj.getEissn, + "", + true + ) } - def doajHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def doajHostedByDataset( + spark: SparkSession, + datasourcePath: String + ): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] val mapper = new ObjectMapper() - val dd: Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[DOAJModel] = spark.read + .textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[DOAJModel])) dd.map { ddt => doajToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) @@ -159,7 +214,6 @@ object SparkProduceHostedByMap { lst } - def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode: String): Unit = { val conf = new Configuration() @@ -169,49 +223,51 @@ object SparkProduceHostedByMap { val writer = new PrintWriter(output) try { input.foreach(hbi => writer.println(hbi)) - } - finally { + } finally { writer.close() } } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val datasourcePath = parser.get("datasourcePath") val workingDirPath = parser.get("workingPath") val outputPath = parser.get("outputPath") - implicit val formats = DefaultFormats - logger.info("Getting the Datasources") - - Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) - .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) - .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) - .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) + Aggregators + .explodeHostedByItemType( + oaHostedByDataset(spark, datasourcePath) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) + .flatMap(hbi => toList(hbi)) + ) + .filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath, classOf[GzipCodec]) - + .rdd + .saveAsTextFile(outputPath, classOf[GzipCodec]) } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index fa13f477c..533948289 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -20,7 +20,13 @@ object CopyHdfsOafSparkApplication { def main(args: Array[String]): Unit = { val log = LoggerFactory.getLogger(getClass) val conf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json") + ) + .mkString + ) parser.parseArgument(args) val spark = @@ -28,7 +34,8 @@ object CopyHdfsOafSparkApplication { .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sc: SparkContext = spark.sparkContext @@ -49,19 +56,22 @@ object CopyHdfsOafSparkApplication { implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala + val paths = + DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala - val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList + val validPaths: List[String] = + paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList - val types = ModelSupport.oafTypes.entrySet - .asScala + val types = ModelSupport.oafTypes.entrySet.asScala .map(e => Tuple2(e.getKey, e.getValue)) if (validPaths.nonEmpty) { val oaf = spark.read.textFile(validPaths: _*) - val mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + val mapper = + new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - types.foreach(t => oaf + types.foreach(t => + oaf .filter(o => isOafType(o, t._1)) .map(j => mapper.readValue(j, t._2).asInstanceOf[Oaf]) .map(s => mapper.writeValueAsString(s))(Encoders.STRING) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 8e15063c2..f5a13e72b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.common.EntityType -import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset,_} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkConf @@ -13,20 +13,32 @@ import org.slf4j.{Logger, LoggerFactory} object SparkResolveEntities { val mapper = new ObjectMapper() - val entities = List(EntityType.dataset, EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + + val entities = List( + EntityType.dataset, + EntityType.publication, + EntityType.software, + EntityType.otherresearchproduct + ) def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val graphBasePath = parser.get("graphBasePath") log.info(s"graphBasePath -> $graphBasePath") @@ -38,7 +50,6 @@ object SparkResolveEntities { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) fs.mkdirs(new Path(workingPath)) @@ -46,60 +57,84 @@ object SparkResolveEntities { generateResolvedEntities(spark, workingPath, graphBasePath, targetPath) } - def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val rPid: Dataset[(String, String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] - val up: Dataset[(String, Result)] = spark.read.text(unresolvedPath).as[String].map(s => mapper.readValue(s, classOf[Result])).map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + val rPid: Dataset[(String, String)] = + spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] + val up: Dataset[(String, Result)] = spark.read + .text(unresolvedPath) + .as[String] + .map(s => mapper.readValue(s, classOf[Result])) + .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) - rPid.joinWith(up, rPid("_2").equalTo(up("_1")), "inner").map { - r => + rPid + .joinWith(up, rPid("_2").equalTo(up("_1")), "inner") + .map { r => val result = r._2._2 val dnetId = r._1._1 result.setId(dnetId) result - }.write.mode(SaveMode.Overwrite).save(s"$workingPath/resolvedEntities") + } + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/resolvedEntities") } - def deserializeObject(input: String, entity: EntityType): Result = { entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software => mapper.readValue(input, classOf[Software]) + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software => mapper.readValue(input, classOf[Software]) case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) } } - def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String, targetPath: String) = { + def generateResolvedEntities( + spark: SparkSession, + workingPath: String, + graphBasePath: String, + targetPath: String + ) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re: Dataset[(String, Result)] = spark.read.load(s"$workingPath/resolvedEntities").as[Result].map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) - entities.foreach { - e => { + val re: Dataset[(String, Result)] = spark.read + .load(s"$workingPath/resolvedEntities") + .as[Result] + .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + entities.foreach { e => + { - val currentEntityDataset: Dataset[(String, Result)] = spark.read.text(s"$graphBasePath/$e").as[String].map(s => deserializeObject(s, e)).map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + val currentEntityDataset: Dataset[(String, Result)] = spark.read + .text(s"$graphBasePath/$e") + .as[String] + .map(s => deserializeObject(s, e)) + .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) - currentEntityDataset.joinWith(re, currentEntityDataset("_1").equalTo(re("_1")), "left").map(k => { + currentEntityDataset + .joinWith(re, currentEntityDataset("_1").equalTo(re("_1")), "left") + .map(k => { - val a = k._1 - val b = k._2 - if (b == null) - a._2 - else { - a._2.mergeFrom(b._2) - a._2 - } - }).map(r => mapper.writeValueAsString(r))(Encoders.STRING) - .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$targetPath/$e") + val a = k._1 + val b = k._2 + if (b == null) + a._2 + else { + a._2.mergeFrom(b._2) + a._2 + } + }) + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(s"$targetPath/$e") } - } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index 80c09940f..2567a30a6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -17,18 +17,25 @@ import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} object SparkResolveRelation { + def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val graphBasePath = parser.get("graphBasePath") log.info(s"graphBasePath -> $graphBasePath") @@ -41,7 +48,6 @@ object SparkResolveRelation { implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) import spark.implicits._ - //CLEANING TEMPORARY FOLDER HdfsSupport.remove(workingPath, spark.sparkContext.hadoopConfiguration) val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) @@ -51,39 +57,49 @@ object SparkResolveRelation { val mapper: ObjectMapper = new ObjectMapper() - val rPid: Dataset[(String, String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] + val rPid: Dataset[(String, String)] = + spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] - val relationDs: Dataset[(String, Relation)] = spark.read.text(s"$graphBasePath/relation").as[String] - .map(s => mapper.readValue(s, classOf[Relation])).as[Relation] + val relationDs: Dataset[(String, Relation)] = spark.read + .text(s"$graphBasePath/relation") + .as[String] + .map(s => mapper.readValue(s, classOf[Relation])) + .as[Relation] .map(r => (r.getSource.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - relationDs.joinWith(rPid, relationDs("_1").equalTo(rPid("_2")), "left").map { - m => + relationDs + .joinWith(rPid, relationDs("_1").equalTo(rPid("_2")), "left") + .map { m => val sourceResolved = m._2 val currentRelation = m._1._2 if (sourceResolved != null && sourceResolved._1 != null && sourceResolved._1.nonEmpty) currentRelation.setSource(sourceResolved._1) currentRelation - }.write + } + .write .mode(SaveMode.Overwrite) .save(s"$workingPath/relationResolvedSource") - - val relationSourceResolved: Dataset[(String, Relation)] = spark.read.load(s"$workingPath/relationResolvedSource").as[Relation] + val relationSourceResolved: Dataset[(String, Relation)] = spark.read + .load(s"$workingPath/relationResolvedSource") + .as[Relation] .map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left").map { - m => + relationSourceResolved + .joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left") + .map { m => val targetResolved = m._2 val currentRelation = m._1._2 if (targetResolved != null && targetResolved._1.nonEmpty) currentRelation.setTarget(targetResolved._1) currentRelation - } + } .write .mode(SaveMode.Overwrite) .save(s"$workingPath/relation_resolved") - spark.read.load(s"$workingPath/relation_resolved").as[Relation] + spark.read + .load(s"$workingPath/relation_resolved") + .as[Relation] .filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) .map(r => mapper.writeValueAsString(r)) .write @@ -96,33 +112,31 @@ object SparkResolveRelation { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val result: List[(String, String)] = for { - JObject(iObj) <- json \ "instance" - JField("collectedfrom", JObject(cf)) <- iObj + JObject(iObj) <- json \ "instance" + JField("collectedfrom", JObject(cf)) <- iObj JField("instancetype", JObject(instancetype)) <- iObj - JField("value", JString(collectedFrom)) <- cf - JField("classname", JString(classname)) <- instancetype + JField("value", JString(collectedFrom)) <- cf + JField("classname", JString(classname)) <- instancetype } yield (classname, collectedFrom) result } - def extractPidsFromRecord(input: String): (String, List[(String, String)]) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val id: String = (json \ "id").extract[String] val result: List[(String, String)] = for { - JObject(pids) <- json \\ "instance" \ "pid" - JField("value", JString(pidValue)) <- pids + JObject(pids) <- json \\ "instance" \ "pid" + JField("value", JString(pidValue)) <- pids JField("qualifier", JObject(qualifier)) <- pids - JField("classid", JString(pidType)) <- qualifier + JField("classid", JString(pidType)) <- qualifier } yield (pidValue, pidType) (id, result) } - private def isRelation(input: String): Boolean = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -132,20 +146,25 @@ object SparkResolveRelation { source != null } - def extractPidResolvedTableFromJsonRDD(spark: SparkSession, graphPath: String, workingPath: String) = { + def extractPidResolvedTableFromJsonRDD( + spark: SparkSession, + graphPath: String, + workingPath: String + ) = { import spark.implicits._ - val d: RDD[(String, String)] = spark.sparkContext.textFile(s"$graphPath/*") + val d: RDD[(String, String)] = spark.sparkContext + .textFile(s"$graphPath/*") .filter(i => !isRelation(i)) .map(i => extractPidsFromRecord(i)) .filter(s => s != null && s._1 != null && s._2 != null && s._2.nonEmpty) .flatMap { p => - p._2.map(pid => - (p._1, DHPUtils.generateUnresolvedIdentifier(pid._1, pid._2)) - ) - }.filter(r => r._1 != null || r._2 != null) + p._2.map(pid => (p._1, DHPUtils.generateUnresolvedIdentifier(pid._1, pid._2))) + } + .filter(r => r._1 != null || r._2 != null) - spark.createDataset(d) + spark + .createDataset(d) .groupByKey(_._2) .reduceGroups((x, y) => if (x._1.startsWith("50|doi") || x._1.startsWith("50|pmid")) x else y) .map(s => s._2) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala index 9df3b41bd..79b1c22cd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala @@ -7,24 +7,26 @@ import org.apache.spark.sql.SparkSession object SparkDataciteToOAF { - def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val sc = spark.sparkContext val inputPath = parser.get("inputPath") - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 9d16cf907..fb90531c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -11,18 +11,22 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertDatasetToJsonRDD { - def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") @@ -33,9 +37,13 @@ object SparkConvertDatasetToJsonRDD { val mapper = new ObjectMapper() implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - resultObject.foreach { item => - spark.read.load(s"$sourcePath/$item").as[Result].map(r => mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + spark.read + .load(s"$sourcePath/$item") + .as[Result] + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .rdd + .saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index 0c54de7c8..bfa07eb69 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -15,14 +15,19 @@ object SparkConvertObjectToJson { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") @@ -33,24 +38,28 @@ object SparkConvertObjectToJson { val scholixUpdatePath = parser.get("scholixUpdatePath") log.info(s"scholixUpdatePath -> $scholixUpdatePath") - - implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val mapper = new ObjectMapper objectType.toLowerCase match { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - val u :Dataset[Scholix]= spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix] - d.union(u).repartition(8000).map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.saveAsTextFile(targetPath, classOf[GzipCodec]) + val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix] + d.union(u) + .repartition(8000) + .map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .rdd + .saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .rdd + .repartition(1000) + .saveAsTextFile(targetPath, classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 2115df1fd..1bfb9a9ac 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,26 +2,38 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{ + OtherResearchProduct, + Publication, + Relation, + Result, + Software, + Dataset => OafDataset +} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") @@ -31,43 +43,79 @@ object SparkConvertRDDtoDataset { val entityPath = s"$t/entities" val relPath = s"$t/relation" val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) - + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = + Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) log.info("Converting dataset") - val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) - spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") - + val rddDataset = spark.sparkContext + .textFile(s"$sourcePath/dataset") + .map(s => mapper.readValue(s, classOf[OafDataset])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddDataset) + .as[OafDataset] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/dataset") log.info("Converting publication") - val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) - spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") + val rddPublication = spark.sparkContext + .textFile(s"$sourcePath/publication") + .map(s => mapper.readValue(s, classOf[Publication])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddPublication) + .as[Publication] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/publication") log.info("Converting software") - val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) - spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") + val rddSoftware = spark.sparkContext + .textFile(s"$sourcePath/software") + .map(s => mapper.readValue(s, classOf[Software])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddSoftware) + .as[Software] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/software") log.info("Converting otherresearchproduct") - val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])).filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) - spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") - + val rddOtherResearchProduct = spark.sparkContext + .textFile(s"$sourcePath/otherresearchproduct") + .map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddOtherResearchProduct) + .as[OtherResearchProduct] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/otherresearchproduct") log.info("Converting Relation") + val relationSemanticFilter = List( + "cites", + "iscitedby", + "merges", + "ismergedin", + "HasAmongTopNSimilarDocuments", + "IsAmongTopNSimilarDocuments" + ) - val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin", "HasAmongTopNSimilarDocuments","IsAmongTopNSimilarDocuments" ) - - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") + val rddRelation = spark.sparkContext + .textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r=> r.getDataInfo!= null && r.getDataInfo.getDeletedbyinference == false) - .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index ed88cfaa6..9d57e5869 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset,_} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -13,82 +13,131 @@ object SparkCreateInputGraph { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/extract_entities_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/extract_entities_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val resultObject = List( ("publication", classOf[Publication]), ("dataset", classOf[OafDataset]), ("software", classOf[Software]), ("otherResearchProduct", classOf[OtherResearchProduct]) - ) implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = + Encoders.kryo(classOf[OtherResearchProduct]) implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val oafDs: Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] - log.info("Extract Publication") - oafDs.filter(o => o.isInstanceOf[Publication]).map(p => p.asInstanceOf[Publication]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/publication") + oafDs + .filter(o => o.isInstanceOf[Publication]) + .map(p => p.asInstanceOf[Publication]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/publication") log.info("Extract dataset") - oafDs.filter(o => o.isInstanceOf[OafDataset]).map(p => p.asInstanceOf[OafDataset]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/dataset") + oafDs + .filter(o => o.isInstanceOf[OafDataset]) + .map(p => p.asInstanceOf[OafDataset]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/dataset") log.info("Extract software") - oafDs.filter(o => o.isInstanceOf[Software]).map(p => p.asInstanceOf[Software]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/software") + oafDs + .filter(o => o.isInstanceOf[Software]) + .map(p => p.asInstanceOf[Software]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/software") log.info("Extract otherResearchProduct") - oafDs.filter(o => o.isInstanceOf[OtherResearchProduct]).map(p => p.asInstanceOf[OtherResearchProduct]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/otherResearchProduct") + oafDs + .filter(o => o.isInstanceOf[OtherResearchProduct]) + .map(p => p.asInstanceOf[OtherResearchProduct]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/otherResearchProduct") log.info("Extract Relation") - oafDs.filter(o => o.isInstanceOf[Relation]).map(p => p.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/relation") + oafDs + .filter(o => o.isInstanceOf[Relation]) + .map(p => p.asInstanceOf[Relation]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/relation") resultObject.foreach { r => log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}", s"$targetPath/preprocess/${r._1}", spark, r._2) + makeDatasetUnique( + s"$targetPath/extracted/${r._1}", + s"$targetPath/preprocess/${r._1}", + spark, + r._2 + ) } } - - def extractEntities[T <: Oaf](oafDs: Dataset[Oaf], targetPath: String, clazz: Class[T], log: Logger): Unit = { + def extractEntities[T <: Oaf]( + oafDs: Dataset[Oaf], + targetPath: String, + clazz: Class[T], + log: Logger + ): Unit = { implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) log.info(s"Extract ${clazz.getSimpleName}") - oafDs.filter(o => o.isInstanceOf[T]).map(p => p.asInstanceOf[T]).write.mode(SaveMode.Overwrite).save(targetPath) + oafDs + .filter(o => o.isInstanceOf[T]) + .map(p => p.asInstanceOf[T]) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) } - - def makeDatasetUnique[T <: Result](sourcePath: String, targetPath: String, spark: SparkSession, clazz: Class[T]): Unit = { + def makeDatasetUnique[T <: Result]( + sourcePath: String, + targetPath: String, + spark: SparkSession, + clazz: Class[T] + ): Unit = { import spark.implicits._ implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) val ds: Dataset[T] = spark.read.load(sourcePath).as[T] - ds.groupByKey(_.getId).reduceGroups { (x, y) => - x.mergeFrom(y) - x - }.map(_._2).write.mode(SaveMode.Overwrite).save(targetPath) + ds.groupByKey(_.getId) + .reduceGroups { (x, y) => + x.mergeFrom(y) + x + } + .map(_._2) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index 9930c57af..af19b9698 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -17,14 +17,19 @@ object SparkCreateScholix { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_scholix_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_scholix_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val relationPath = parser.get("relationPath") log.info(s"relationPath -> $relationPath") @@ -33,37 +38,46 @@ object SparkCreateScholix { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val relEncoder: Encoder[Relation] = Encoders.kryo[Relation] implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] import spark.implicits._ - - val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] - .filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + val relationDS: Dataset[(String, Relation)] = spark.read + .load(relationPath) + .as[Relation] + .filter(r => + (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase + .contains("merge") + ) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] + val summaryDS: Dataset[(String, ScholixSummary)] = spark.read + .load(summaryPath) + .as[ScholixSummary] .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, summaryEncoder)) - - relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") + relationDS + .joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Relation), (String, ScholixSummary)) => if (input._1 != null && input._2 != null) { val rel: Relation = input._1._2 val source: ScholixSummary = input._2._2 (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) - } - else null + } else null }(Encoders.tuple(Encoders.STRING, scholixEncoder)) .filter(r => r != null) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/scholix_from_source") - val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) + val scholixSource: Dataset[(String, Scholix)] = spark.read + .load(s"$targetPath/scholix_from_source") + .as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) - scholixSource.joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") + scholixSource + .joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Scholix), (String, ScholixSummary)) => if (input._2 == null) { null @@ -72,40 +86,73 @@ object SparkCreateScholix { val target: ScholixSummary = input._2._2 ScholixUtils.generateCompleteScholix(s, target) } - }.filter(s => s != null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") + } + .filter(s => s != null) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/scholix_one_verse") + val scholix_o_v: Dataset[Scholix] = + spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] - val scholix_o_v: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] - - scholix_o_v.flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))).as[Scholix] + scholix_o_v + .flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))) + .as[Scholix] .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) .groupByKey(_._1) .agg(ScholixUtils.scholixAggregator.toColumn) .map(s => s._2) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix") + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/scholix") val scholix_final: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] - val stats: Dataset[(String, String, Long)] = scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String, String, Long)] - + val stats: Dataset[(String, String, Long)] = scholix_final + .map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)) + .groupBy("_1", "_2") + .agg(count("_1")) + .as[(String, String, Long)] stats - .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0)) + .map(s => + RelatedEntities( + s._1, + if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, + if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 + ) + ) .groupByKey(_.id) - .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset + b.relatedDataset, a.relatedPublication + b.relatedPublication)) + .reduceGroups((a, b) => + RelatedEntities( + a.id, + a.relatedDataset + b.relatedDataset, + a.relatedPublication + b.relatedPublication + ) + ) .map(_._2) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/related_entities") + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/related_entities") - val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) + val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read + .load(s"$targetPath/related_entities") + .as[RelatedEntities] + .filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) - relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map { i => - val re = i._1 - val sum = i._2._2 + relatedEntitiesDS + .joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner") + .map { i => + val re = i._1 + val sum = i._2._2 - sum.setRelatedDatasets(re.relatedDataset) - sum.setRelatedPublications(re.relatedPublication) - sum - }.write.mode(SaveMode.Overwrite).save(s"${summaryPath}_filtered") + sum.setRelatedDatasets(re.relatedDataset) + sum.setRelatedPublications(re.relatedPublication) + sum + } + .write + .mode(SaveMode.Overwrite) + .save(s"${summaryPath}_filtered") } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index 4274cae5a..6d489e8cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -14,14 +14,19 @@ object SparkCreateSummaryObject { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_summaries_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_summaries_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") @@ -33,10 +38,17 @@ object SparkCreateSummaryObject { implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + val ds: Dataset[Result] = spark.read + .load(s"$sourcePath/*") + .as[Result] + .filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) - val ds: Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) - - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s != null).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000) + .map(r => ScholixUtils.resultToSummary(r)) + .filter(s => s != null) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala index c70397d04..00d0d66c4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -10,61 +10,89 @@ import java.util.regex.Pattern import scala.language.postfixOps import scala.xml.{Elem, Node, XML} -case class PangaeaDataModel(identifier:String, title:List[String], objectType:List[String], creator:List[String], - publisher:List[String], dataCenter :List[String],subject :List[String], language:String, - rights:String, parent:String,relation :List[String],linkage:List[(String,String)] ) {} +case class PangaeaDataModel( + identifier: String, + title: List[String], + objectType: List[String], + creator: List[String], + publisher: List[String], + dataCenter: List[String], + subject: List[String], + language: String, + rights: String, + parent: String, + relation: List[String], + linkage: List[(String, String)] +) {} object PangaeaUtils { - - def toDataset(input:String):PangaeaDataModel = { + def toDataset(input: String): PangaeaDataModel = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val xml= (json \ "xml").extract[String] + val xml = (json \ "xml").extract[String] parseXml(xml) } - def findDOIInRelation( input:List[String]):List[String] = { + def findDOIInRelation(input: List[String]): List[String] = { val pattern = Pattern.compile("\\b(10[.][0-9]{4,}(?:[.][0-9]+)*\\/(?:(?![\"&\\'<>])\\S)+)\\b") - input.map(i => { - val matcher = pattern.matcher(i) - if (matcher.find()) - matcher.group(0) - else - null - }).filter(i => i!= null) + input + .map(i => { + val matcher = pattern.matcher(i) + if (matcher.find()) + matcher.group(0) + else + null + }) + .filter(i => i != null) } - def attributeOpt(attribute: String, node:Node): Option[String] = + def attributeOpt(attribute: String, node: Node): Option[String] = node.attribute(attribute) flatMap (_.headOption) map (_.text) - def extractLinkage(node:Elem):List[(String, String)] = { - (node \ "linkage").map(n =>(attributeOpt("type",n), n.text)).filter(t => t._1.isDefined).map(t=> (t._1.get, t._2))(collection.breakOut) + def extractLinkage(node: Elem): List[(String, String)] = { + (node \ "linkage") + .map(n => (attributeOpt("type", n), n.text)) + .filter(t => t._1.isDefined) + .map(t => (t._1.get, t._2))(collection.breakOut) } - def parseXml(input:String):PangaeaDataModel = { + def parseXml(input: String): PangaeaDataModel = { val xml = XML.loadString(input) val identifier = (xml \ "identifier").text - val title :List[String] = (xml \ "title").map(n => n.text)(collection.breakOut) - val pType :List[String] = (xml \ "type").map(n => n.text)(collection.breakOut) - val creators:List[String] = (xml \ "creator").map(n => n.text)(collection.breakOut) - val publisher :List[String] = (xml \ "publisher").map(n => n.text)(collection.breakOut) - val dataCenter :List[String] = (xml \ "dataCenter").map(n => n.text)(collection.breakOut) - val subject :List[String] = (xml \ "subject").map(n => n.text)(collection.breakOut) - val language= (xml \ "language").text - val rights= (xml \ "rights").text - val parentIdentifier= (xml \ "parentIdentifier").text - val relation :List[String] = (xml \ "relation").map(n => n.text)(collection.breakOut) + val title: List[String] = (xml \ "title").map(n => n.text)(collection.breakOut) + val pType: List[String] = (xml \ "type").map(n => n.text)(collection.breakOut) + val creators: List[String] = (xml \ "creator").map(n => n.text)(collection.breakOut) + val publisher: List[String] = (xml \ "publisher").map(n => n.text)(collection.breakOut) + val dataCenter: List[String] = (xml \ "dataCenter").map(n => n.text)(collection.breakOut) + val subject: List[String] = (xml \ "subject").map(n => n.text)(collection.breakOut) + val language = (xml \ "language").text + val rights = (xml \ "rights").text + val parentIdentifier = (xml \ "parentIdentifier").text + val relation: List[String] = (xml \ "relation").map(n => n.text)(collection.breakOut) val relationFiltered = findDOIInRelation(relation) - val linkage:List[(String,String)] = extractLinkage(xml) + val linkage: List[(String, String)] = extractLinkage(xml) - PangaeaDataModel(identifier,title, pType, creators,publisher, dataCenter, subject, language, rights, parentIdentifier, relationFiltered, linkage) + PangaeaDataModel( + identifier, + title, + pType, + creators, + publisher, + dataCenter, + subject, + language, + rights, + parentIdentifier, + relationFiltered, + linkage + ) } - - def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] = new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel]{ - + def getDatasetAggregator() + : Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] = + new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] { override def zero: PangaeaDataModel = null @@ -77,7 +105,7 @@ object PangaeaUtils { else { if (b.title != null && b.title.nonEmpty) b - else + else a._2 } @@ -106,7 +134,4 @@ object PangaeaUtils { override def outputEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] } - - - -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index 2717b7b80..8ff8a8b1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -11,20 +11,25 @@ import scala.io.Source object SparkGeneratePanagaeaDataset { - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/pangaea/pangaea_to_dataset.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/pangaea/pangaea_to_dataset.json") + ) + .mkString + ) parser.parseArgument(args) - val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkGeneratePanagaeaDataset.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}")) logger.info("Converting sequential file into Dataset") @@ -34,16 +39,20 @@ object SparkGeneratePanagaeaDataset { implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - val inputRDD: RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) + val inputRDD: RDD[PangaeaDataModel] = + sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) - spark.createDataset(inputRDD).as[PangaeaDataModel] + spark + .createDataset(inputRDD) + .as[PangaeaDataModel] .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) .groupByKey(_._1)(Encoders.STRING) .agg(PangaeaUtils.getDatasetAggregator().toColumn) .map(s => s._2) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset") + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/dataset") } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index 4613d5636..7e41e993f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -9,10 +9,10 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test -class TestApply extends java.io.Serializable{ +class TestApply extends java.io.Serializable { @Test - def testApplyOnResult (): Unit = { + def testApplyOnResult(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -25,54 +25,104 @@ class TestApply extends java.io.Serializable{ val pub = getClass.getResource("publication.json").getPath val hbm = getClass.getResource("preparedInfo.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) implicit val mapEncoderPubInfo: Encoder[Publication] = Encoders.bean(classOf[Publication]) - - val pub_ds :Dataset[Publication] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) - + val pub_ds: Dataset[Publication] = + spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) + val hbm_ds: Dataset[EntityInfo] = + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) assertEquals(13, pub_ds.count()) - val ds:Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) + val ds: Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) - assertEquals(13, ds.count) + assertEquals(13, ds.count) - val temp: Dataset[(Publication, Publication)] = pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") + val temp: Dataset[(Publication, Publication)] = + pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") assertEquals(13, temp.count()) temp.foreach(t2 => { - val pb : Publication = t2._1 - val pa : Publication = t2._2 + val pb: Publication = t2._1 + val pa: Publication = t2._2 assertEquals(1, pa.getInstance().size()) assertEquals(1, pb.getInstance().size()) assertTrue(t2._1.getId.equals(t2._2.getId)) - if(pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")){ - assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735")) + if (pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")) { + assertTrue( + pa.getInstance() + .get(0) + .getHostedby + .getKey + .equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735") + ) assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) - assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold)) + assertTrue( + pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold) + ) assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) - - assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) - assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) + assertTrue( + pb.getInstance() + .get(0) + .getHostedby + .getKey + .equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c") + ) + assertTrue( + pb.getInstance() + .get(0) + .getHostedby + .getValue + .equals("Revistas de investigación Universidad Nacional Mayor de San Marcos") + ) assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("not available")) assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("UNKNOWN")) assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) assertTrue(pb.getBestaccessright.getClassid.equals("UNKNOWN")) assertTrue(pb.getBestaccessright.getClassname.equals("not available")) - }else{ - assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) - assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals(pb.getInstance().get(0).getHostedby.getValue)) - assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals(pb.getInstance().get(0).getAccessright.getClassid)) - assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals(pb.getInstance().get(0).getAccessright.getClassname)) - assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb.getInstance().get(0).getAccessright.getOpenAccessRoute) + } else { + assertTrue( + pa.getInstance() + .get(0) + .getHostedby + .getKey + .equals(pb.getInstance().get(0).getHostedby.getKey) + ) + assertTrue( + pa.getInstance() + .get(0) + .getHostedby + .getValue + .equals(pb.getInstance().get(0).getHostedby.getValue) + ) + assertTrue( + pa.getInstance() + .get(0) + .getAccessright + .getClassid + .equals(pb.getInstance().get(0).getAccessright.getClassid) + ) + assertTrue( + pa.getInstance() + .get(0) + .getAccessright + .getClassname + .equals(pb.getInstance().get(0).getAccessright.getClassname) + ) + assertTrue( + pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb + .getInstance() + .get(0) + .getAccessright + .getOpenAccessRoute + ) } }) @@ -80,9 +130,8 @@ class TestApply extends java.io.Serializable{ spark.close() } - @Test - def testApplyOnDatasource():Unit = { + def testApplyOnDatasource(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -95,38 +144,49 @@ class TestApply extends java.io.Serializable{ val dats = getClass.getResource("datasource.json").getPath val hbm = getClass.getResource("preparedInfo2.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) implicit val mapEncoderPubInfo: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - - val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) - val hbm_ds :Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo]))) - + val dats_ds: Dataset[Datasource] = + spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) + val hbm_ds: Dataset[EntityInfo] = Aggregators.datasourceToSingleId( + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + ) assertEquals(10, dats_ds.count()) - val ds:Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) + val ds: Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) - assertEquals(10, ds.count) + assertEquals(10, ds.count) - val temp: Dataset[(Datasource, Datasource)] = dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") + val temp: Dataset[(Datasource, Datasource)] = + dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") assertEquals(10, temp.count()) temp.foreach(t2 => { - val pb : Datasource = t2._1 - val pa : Datasource = t2._2 + val pb: Datasource = t2._1 + val pa: Datasource = t2._2 assertTrue(t2._1.getId.equals(t2._2.getId)) - if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { + if (pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) - assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) + assertTrue( + pa.getOpenairecompatibility() + .getClassname + .equals("collected from a compatible aggregator") + ) assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) - } else { - assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) - assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) + assertTrue( + pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid) + ) + assertTrue( + pa.getOpenairecompatibility() + .getClassname + .equals(pb.getOpenairecompatibility.getClassname) + ) } }) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index 7abce547f..23b42009e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -1,7 +1,11 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{ + joinResHBM, + prepareResultInfo, + toEntityInfo +} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} @@ -9,9 +13,9 @@ import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test -class TestPrepare extends java.io.Serializable{ +class TestPrepare extends java.io.Serializable { - def getString(input:HostedByItemType):String = { + def getString(input: HostedByItemType): String = { import org.json4s.jackson.Serialization.write implicit val formats = DefaultFormats @@ -19,9 +23,8 @@ class TestPrepare extends java.io.Serializable{ write(input) } - @Test - def testHostedByMaptoEntityInfo() : Unit = { + def testHostedByMaptoEntityInfo(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -33,23 +36,23 @@ class TestPrepare extends java.io.Serializable{ .getOrCreate() val hbm = getClass.getResource("hostedbymap.json").getPath - import spark.implicits._ - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val ds :Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) + val ds: Dataset[EntityInfo] = + spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) ds.foreach(e => println(mapper.writeValueAsString(e))) - assertEquals(20, ds.count) + assertEquals(20, ds.count) spark.close() } @Test - def testPublicationtoEntityInfo() : Unit = { + def testPublicationtoEntityInfo(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -61,24 +64,30 @@ class TestPrepare extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("publication.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val ds :Dataset[EntityInfo] = prepareResultInfo(spark, path) + val ds: Dataset[EntityInfo] = prepareResultInfo(spark, path) ds.foreach(e => println(mapper.writeValueAsString(e))) - assertEquals(2, ds.count) + assertEquals(2, ds.count) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId) + assertEquals( + "50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", + ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId + ) + assertEquals( + "50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", + ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId + ) spark.close() } @Test - def testJoinResHBM (): Unit = { + def testJoinResHBM(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -91,18 +100,20 @@ class TestPrepare extends java.io.Serializable{ val pub = getClass.getResource("iteminfofrompublication").getPath val hbm = getClass.getResource("iteminfofromhostedbymap.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + val pub_ds: Dataset[EntityInfo] = + spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds: Dataset[EntityInfo] = + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) - assertEquals(1, ds.count) + assertEquals(1, ds.count) - val ei:EntityInfo = ds.first() + val ei: EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) @@ -114,7 +125,7 @@ class TestPrepare extends java.io.Serializable{ } @Test - def testJoinResHBM2 (): Unit = { + def testJoinResHBM2(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -127,18 +138,20 @@ class TestPrepare extends java.io.Serializable{ val pub = getClass.getResource("iteminfofrompublication2").getPath val hbm = getClass.getResource("iteminfofromhostedbymap2.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + val pub_ds: Dataset[EntityInfo] = + spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds: Dataset[EntityInfo] = + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) - assertEquals(1, ds.count) + assertEquals(1, ds.count) - val ei:EntityInfo = ds.first() + val ei: EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) @@ -150,6 +163,4 @@ class TestPrepare extends java.io.Serializable{ spark.close() } - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 0922f2e19..cc7131119 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -8,20 +8,19 @@ import org.json4s.jackson.Serialization.write import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test -class TestPreprocess extends java.io.Serializable{ +class TestPreprocess extends java.io.Serializable { implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource] implicit val schema = Encoders.product[HostedByInfo] - - def toHBIString (hbi:HostedByItemType): String = { + def toHBIString(hbi: HostedByItemType): String = { implicit val formats = DefaultFormats write(hbi) } @Test - def readDatasource():Unit = { + def readDatasource(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -33,29 +32,41 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("datasource.json").getPath - val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path) + val ds: Dataset[HostedByItemType] = SparkProduceHostedByMap.oaHostedByDataset(spark, path) - assertEquals(9, ds.count) + assertEquals(9, ds.count) assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count) assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count) assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) - assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertEquals( + 0, + ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count + ) assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1) assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1) - assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + assertTrue( + ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1 + ) + assertTrue( + ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")) + .count == 1 + ) + assertTrue( + ds.filter(hbi => + hbi.issn.equals("0212-8365") && hbi.id + .equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c") + ).count == 1 + ) ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|"))) ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } - @Test - def readGold():Unit = { + def readGold(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -67,8 +78,7 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("unibi_transformed.json").getPath - - val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path) + val ds: Dataset[HostedByItemType] = SparkProduceHostedByMap.goldHostedByDataset(spark, path) assertEquals(29, ds.count) @@ -76,9 +86,17 @@ class TestPreprocess extends java.io.Serializable{ assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count) assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count) - assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertEquals( + 0, + ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count + ) - assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development.")) + assertTrue( + ds.filter(hbi => hbi.issn.equals("2239-6101")) + .first() + .officialname + .equals("European journal of sustainable development.") + ) assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938")) assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1) ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI))) @@ -88,7 +106,7 @@ class TestPreprocess extends java.io.Serializable{ } @Test - def readDoaj():Unit = { + def readDoaj(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -100,7 +118,7 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("doaj_transformed.json").getPath - val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path) + val ds: Dataset[HostedByItemType] = SparkProduceHostedByMap.doajHostedByDataset(spark, path) assertEquals(25, ds.count) @@ -108,9 +126,17 @@ class TestPreprocess extends java.io.Serializable{ assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count) assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) - assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertEquals( + 0, + ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count + ) - assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology")) + assertTrue( + ds.filter(hbi => hbi.issn.equals("2077-3099")) + .first() + .officialname + .equals("Journal of Space Technology") + ) assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029")) assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1) assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals("")) @@ -121,7 +147,7 @@ class TestPreprocess extends java.io.Serializable{ } @Test - def testAggregator() : Unit = { + def testAggregator(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") @@ -133,22 +159,40 @@ class TestPreprocess extends java.io.Serializable{ .config(conf) .getOrCreate() - - val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) - .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) - .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) - .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + val tmp = SparkProduceHostedByMap + .oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union( + SparkProduceHostedByMap + .goldHostedByDataset(spark, getClass.getResource("unibi_transformed.json").getPath) + ) + .union( + SparkProduceHostedByMap + .doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath) + ) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))( + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + ) assertEquals(106, tmp.count) assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count) + val ds: Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType( + SparkProduceHostedByMap + .oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union( + SparkProduceHostedByMap + .goldHostedByDataset(spark, getClass.getResource("unibi_transformed.json").getPath) + ) + .union( + SparkProduceHostedByMap + .doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath) + ) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))( + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + ) + ) - val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) - .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) - .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) - .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))) - - assertEquals(82, ds.count) + assertEquals(82, ds.count) assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count) @@ -156,14 +200,13 @@ class TestPreprocess extends java.io.Serializable{ assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess) assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count) - val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) + val hbmap: Dataset[String] = ds + .filter(hbi => hbi._2.id.startsWith("10|")) + .map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) hbmap.foreach(entry => println(entry)) spark.close() } - - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index c22243f94..3cdc370d6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.oa.graph.resolution - import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils @@ -19,174 +18,241 @@ import scala.io.Source @TestInstance(Lifecycle.PER_CLASS) class ResolveEntitiesTest extends Serializable { - var workingDir:Path = null + var workingDir: Path = null val FAKE_TITLE = "FAKETITLE" val FAKE_SUBJECT = "FAKESUBJECT" - var sparkSession:Option[SparkSession] = None - + var sparkSession: Option[SparkSession] = None @BeforeAll - def setUp() :Unit = { + def setUp(): Unit = { workingDir = Files.createTempDirectory(getClass.getSimpleName) val conf = new SparkConf() - sparkSession = Some(SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master("local[*]").getOrCreate()) + sparkSession = Some( + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master("local[*]") + .getOrCreate() + ) populateDatasets(sparkSession.get) generateUpdates(sparkSession.get) } - @AfterAll - def tearDown():Unit = { + def tearDown(): Unit = { FileUtils.deleteDirectory(workingDir.toFile) sparkSession.get.stop() - } - - def generateUpdates(spark:SparkSession):Unit = { + def generateUpdates(spark: SparkSession): Unit = { val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString + val pids: List[String] = template.lines + .map { id => + val r = new Result + r.setId(id.toLowerCase.trim) + r.setSubject( + List( + OafMapperUtils.structuredProperty( + FAKE_SUBJECT, + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + null + ) + ).asJava + ) + r.setTitle( + List( + OafMapperUtils.structuredProperty( + FAKE_TITLE, + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + null + ) + ).asJava + ) + r + } + .map { r => + val mapper = new ObjectMapper() - val pids:List[String] = template.lines.map{id => - val r = new Result - r.setId(id.toLowerCase.trim) - r.setSubject(List(OafMapperUtils.structuredProperty(FAKE_SUBJECT, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) - r.setTitle(List(OafMapperUtils.structuredProperty(FAKE_TITLE, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) - r - }.map{r => - val mapper = new ObjectMapper() + mapper.writeValueAsString(r) + } + .toList - mapper.writeValueAsString(r)}.toList - - - val sc =spark.sparkContext + val sc = spark.sparkContext println(sc.parallelize(pids).count()) - spark.createDataset(sc.parallelize(pids))(Encoders.STRING).write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingDir/updates") - - - - + spark + .createDataset(sc.parallelize(pids))(Encoders.STRING) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(s"$workingDir/updates") import spark.implicits._ implicit val resEncoder: Encoder[Result] = Encoders.bean(classOf[Result]) - val ds = spark.read.text(s"$workingDir/updates").as[String].map{s => val mapper = new ObjectMapper() - mapper.readValue(s, classOf[Result])}.collect() - - - + val ds = spark.read + .text(s"$workingDir/updates") + .as[String] + .map { s => + val mapper = new ObjectMapper() + mapper.readValue(s, classOf[Result]) + } + .collect() assertEquals(4, ds.length) - ds.foreach{r => assertNotNull(r.getSubject)} - ds.foreach{r => assertEquals(1,r.getSubject.size())} - ds.foreach{r => assertNotNull(r.getTitle)} - ds.foreach{r => assertEquals(1,r.getTitle.size())} + ds.foreach { r => assertNotNull(r.getSubject) } + ds.foreach { r => assertEquals(1, r.getSubject.size()) } + ds.foreach { r => assertNotNull(r.getTitle) } + ds.foreach { r => assertEquals(1, r.getTitle.size()) } - - - ds.flatMap(r => r.getTitle.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_TITLE,t)) - ds.flatMap(r => r.getSubject.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_SUBJECT,t)) + ds.flatMap(r => r.getTitle.asScala.map(t => t.getValue)) + .foreach(t => assertEquals(FAKE_TITLE, t)) + ds.flatMap(r => r.getSubject.asScala.map(t => t.getValue)) + .foreach(t => assertEquals(FAKE_SUBJECT, t)) println("generated Updates") } - - def populateDatasets(spark:SparkSession):Unit = { + def populateDatasets(spark: SparkSession): Unit = { import spark.implicits._ - val entities =SparkResolveEntities.entities + val entities = SparkResolveEntities.entities - entities.foreach{ - e => - val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString - spark.createDataset(spark.sparkContext.parallelize(template.lines.toList)).as[String].write.option("compression", "gzip").text(s"$workingDir/graph/$e") - println(s"Created Dataset $e") + entities.foreach { e => + val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString + spark + .createDataset(spark.sparkContext.parallelize(template.lines.toList)) + .as[String] + .write + .option("compression", "gzip") + .text(s"$workingDir/graph/$e") + println(s"Created Dataset $e") } - SparkResolveRelation.extractPidResolvedTableFromJsonRDD(spark, s"$workingDir/graph", s"$workingDir/work") + SparkResolveRelation.extractPidResolvedTableFromJsonRDD( + spark, + s"$workingDir/graph", + s"$workingDir/work" + ) } - @Test - def testResolution():Unit = { - val spark:SparkSession = sparkSession.get + def testResolution(): Unit = { + val spark: SparkSession = sparkSession.get implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) + SparkResolveEntities.resolveEntities(spark, s"$workingDir/work", s"$workingDir/updates") val ds = spark.read.load(s"$workingDir/work/resolvedEntities").as[Result] assertEquals(3, ds.count()) - ds.collect().foreach{ - r => + ds.collect().foreach { r => assertTrue(r.getId.startsWith("50")) } } - - - - private def structuredPContainsValue(l:java.util.List[StructuredProperty], exptectedValue:String):Boolean = { - l.asScala.exists(p =>p.getValue!= null && p.getValue.equalsIgnoreCase(exptectedValue)) + private def structuredPContainsValue( + l: java.util.List[StructuredProperty], + exptectedValue: String + ): Boolean = { + l.asScala.exists(p => p.getValue != null && p.getValue.equalsIgnoreCase(exptectedValue)) } @Test - def testUpdate():Unit = { - val spark:SparkSession = sparkSession.get + def testUpdate(): Unit = { + val spark: SparkSession = sparkSession.get import spark.implicits._ implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) val m = new ObjectMapper() - SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) - SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph", s"$workingDir/target" ) - - - - val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/target/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) - val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + SparkResolveEntities.resolveEntities(spark, s"$workingDir/work", s"$workingDir/updates") + SparkResolveEntities.generateResolvedEntities( + spark, + s"$workingDir/work", + s"$workingDir/graph", + s"$workingDir/target" + ) + val pubDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/publication") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) + val t = pubDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() var ct = pubDS.count() - var et = pubDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + var et = pubDS + .filter(p => + p.getTitle != null && p.getTitle.asScala.forall(t => + t.getValue != null && t.getValue.nonEmpty + ) + ) + .count() assertEquals(ct, et) - - - val datDS:Dataset[Result] = spark.read.text(s"$workingDir/target/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) - val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + val datDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/dataset") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val td = datDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() ct = datDS.count() - et = datDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + et = datDS + .filter(p => + p.getTitle != null && p.getTitle.asScala.forall(t => + t.getValue != null && t.getValue.nonEmpty + ) + ) + .count() assertEquals(ct, et) - - val softDS:Dataset[Result] = spark.read.text(s"$workingDir/target/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) - val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + val softDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/software") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) + val ts = softDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() ct = softDS.count() - et = softDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + et = softDS + .filter(p => + p.getTitle != null && p.getTitle.asScala.forall(t => + t.getValue != null && t.getValue.nonEmpty + ) + ) + .count() assertEquals(ct, et) - - val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/target/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) - val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() - + val orpDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/otherresearchproduct") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) + val to = orpDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() ct = orpDS.count() - et = orpDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + et = orpDS + .filter(p => + p.getTitle != null && p.getTitle.asScala.forall(t => + t.getValue != null && t.getValue.nonEmpty + ) + ) + .count() assertEquals(ct, et) - - - - assertEquals(0, t) assertEquals(2, td) assertEquals(1, ts) @@ -194,40 +260,35 @@ class ResolveEntitiesTest extends Serializable { } - - - - @Test - def testMerge():Unit = { + def testMerge(): Unit = { val r = new Result - r.setSubject(List(OafMapperUtils.structuredProperty(FAKE_SUBJECT, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) + r.setSubject( + List( + OafMapperUtils.structuredProperty( + FAKE_SUBJECT, + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + null + ) + ).asJava + ) val mapper = new ObjectMapper() - val p = mapper.readValue(Source.fromInputStream(this.getClass.getResourceAsStream(s"publication")).mkString.lines.next(), classOf[Publication]) - + val p = mapper.readValue( + Source + .fromInputStream(this.getClass.getResourceAsStream(s"publication")) + .mkString + .lines + .next(), + classOf[Publication] + ) r.mergeFrom(p) - println(mapper.writeValueAsString(r)) - - - - - - - } - - - - - - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala index c277b0aa1..80ea9d59c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala @@ -1,26 +1,20 @@ package eu.dnetlib.dhp.sx.graph + import org.junit.jupiter.api.Test import java.text.SimpleDateFormat - - class RetrieveDataciteDeltaTest { @Test def testParsingDate(): Unit = { - val inputDate = "2021-12-02T11:17:36+0000" val t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").parse(inputDate).getTime - println(t) - - } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index 04b1f9ecd..6dc945d4b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -18,78 +18,89 @@ import scala.collection.JavaConverters._ import scala.io.Source @ExtendWith(Array(classOf[MockitoExtension])) -class ScholixGraphTest extends AbstractVocabularyTest{ - +class ScholixGraphTest extends AbstractVocabularyTest { val mapper: ObjectMapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false) + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) @BeforeEach - def setUp() :Unit = { + def setUp(): Unit = { super.setUpVocabulary() } - @Test - def testExtractPids():Unit = { + def testExtractPids(): Unit = { - val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")).mkString - val res =SparkResolveRelation.extractPidsFromRecord(input) + val input = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")) + .mkString + val res = SparkResolveRelation.extractPidsFromRecord(input) assertNotNull(res) - assertEquals(1,res._2.size) + assertEquals(1, res._2.size) } @Test - def testOAFToSummary():Unit= { - val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")).mkString + def testOAFToSummary(): Unit = { + val inputRelations = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")) + .mkString val items = inputRelations.lines.toList assertNotNull(items) - items.foreach(i =>assertTrue(i.nonEmpty)) - val result = items.map(r => mapper.readValue(r, classOf[Result])).map(i => ScholixUtils.resultToSummary(i)) + items.foreach(i => assertTrue(i.nonEmpty)) + val result = + items.map(r => mapper.readValue(r, classOf[Result])).map(i => ScholixUtils.resultToSummary(i)) assertNotNull(result) assertEquals(result.size, items.size) - val d = result.find(s => s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty)) + val d = result.find(s => + s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty) + ) assertFalse(d.isDefined) println(mapper.writeValueAsString(result.head)) } - - @Test - def testScholixMergeOnSource():Unit = { - val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")).mkString - val result:List[(Relation,ScholixSummary)] =inputRelations.lines.sliding(2).map(s => (s.head, s(1))).map(p => (mapper.readValue(p._1, classOf[Relation]),mapper.readValue(p._2, classOf[ScholixSummary]) )).toList + def testScholixMergeOnSource(): Unit = { + val inputRelations = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix") + ) + .mkString + val result: List[(Relation, ScholixSummary)] = inputRelations.lines + .sliding(2) + .map(s => (s.head, s(1))) + .map(p => + (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])) + ) + .toList assertNotNull(result) assertTrue(result.nonEmpty) result.foreach(r => assertEquals(r._1.getSource, r._2.getId)) - val scholix:List[Scholix] = result.map(r => ScholixUtils.scholixFromSource(r._1, r._2)) + val scholix: List[Scholix] = result.map(r => ScholixUtils.scholixFromSource(r._1, r._2)) println(mapper.writeValueAsString(scholix.head)) } - - - @Test def testScholixRelationshipsClean(): Unit = { - val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relation_transform.json")).mkString + val inputRelations = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relation_transform.json") + ) + .mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(inputRelations) - val l:List[String] =json.extract[List[String]] + val l: List[String] = json.extract[List[String]] assertNotNull(l) assertTrue(l.nonEmpty) - val relVocbaulary =ScholixUtils.relations - l.foreach(r => assertTrue(relVocbaulary.contains(r.toLowerCase))) + val relVocbaulary = ScholixUtils.relations + l.foreach(r => assertTrue(relVocbaulary.contains(r.toLowerCase))) } - - - } diff --git a/pom.xml b/pom.xml index ed7b8a2ca..b68671aec 100644 --- a/pom.xml +++ b/pom.xml @@ -620,6 +620,18 @@ + + org.antipathy + mvn-scalafmt_2.11 + 1.0.1640073709.733712b + + + eu.dnetlib.dhp + dhp-code-style + ${project.version} + + +
@@ -665,6 +677,33 @@ + + org.antipathy + mvn-scalafmt_2.11 + + dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf + false + false + + ${project.basedir}/src/main/scala + + + ${project.basedir}/src/test/scala + + false + false + : git rev-parse --abbrev-ref HEAD + false + + + + validate + + format + + + + org.apache.maven.plugins maven-release-plugin From 0f2144b5e011b76a23e9447433f494502943f243 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 Jan 2022 17:03:44 +0100 Subject: [PATCH 134/176] scalafmt: code formatting --- .../doiboost/crossref/Crossref2Oaf.scala | 4 +- .../mag/SparkImportMagIntoDataset.scala | 274 +++++++++++++++--- 2 files changed, 241 insertions(+), 37 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 60f4e88c8..7770ea249 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -438,8 +438,8 @@ case object Crossref2Oaf { funders.foreach(funder => { if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { funder.DOI.get match { - case "10.13039/100010663" | "10.13039/100010661" | - "10.13039/501100007601" | "10.13039/501100000780" | "10.13039/100010665" => + case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | + "10.13039/501100000780" | "10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" => diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index 039c935f3..316bd91ac 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -8,44 +8,245 @@ import org.apache.spark.sql.{SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkImportMagIntoDataset { + val datatypedict = Map( - "bool" -> BooleanType, - "int" -> IntegerType, - "uint" -> IntegerType, - "long" -> LongType, - "ulong" -> LongType, - "float" -> FloatType, - "string" -> StringType, + "bool" -> BooleanType, + "int" -> IntegerType, + "uint" -> IntegerType, + "long" -> LongType, + "ulong" -> LongType, + "float" -> FloatType, + "string" -> StringType, "DateTime" -> DateType ) - val stream = Map( - "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), - "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), - "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), - "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), - "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), - "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), - "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), - "PaperExtendedAttributes" -> Tuple2("mag/PaperExtendedAttributes.txt", Seq("PaperId:long", "AttributeType:int", "AttributeValue:string")), - "PaperFieldsOfStudy" -> Tuple2("advanced/PaperFieldsOfStudy.txt", Seq("PaperId:long", "FieldOfStudyId:long", "Score:float")), - "PaperMeSH" -> Tuple2("advanced/PaperMeSH.txt", Seq("PaperId:long", "DescriptorUI:string", "DescriptorName:string", "QualifierUI:string", "QualifierName:string", "IsMajorTopic:bool")), - "PaperRecommendations" -> Tuple2("advanced/PaperRecommendations.txt", Seq("PaperId:long", "RecommendedPaperId:long", "Score:float")), - "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")), - "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")), - "PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")), - "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "DocSubTypes:string", "CreatedDate:DateTime")), - "RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float")) + "Affiliations" -> Tuple2( + "mag/Affiliations.txt", + Seq( + "AffiliationId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "GridId:string", + "OfficialPage:string", + "WikiPage:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "Iso3166Code:string", + "Latitude:float?", + "Longitude:float?", + "CreatedDate:DateTime" + ) + ), + "AuthorExtendedAttributes" -> Tuple2( + "mag/AuthorExtendedAttributes.txt", + Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string") + ), + "Authors" -> Tuple2( + "mag/Authors.txt", + Seq( + "AuthorId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "LastKnownAffiliationId:long?", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "ConferenceInstances" -> Tuple2( + "mag/ConferenceInstances.txt", + Seq( + "ConferenceInstanceId:long", + "NormalizedName:string", + "DisplayName:string", + "ConferenceSeriesId:long", + "Location:string", + "OfficialUrl:string", + "StartDate:DateTime?", + "EndDate:DateTime?", + "AbstractRegistrationDate:DateTime?", + "SubmissionDeadlineDate:DateTime?", + "NotificationDueDate:DateTime?", + "FinalVersionDueDate:DateTime?", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "Latitude:float?", + "Longitude:float?", + "CreatedDate:DateTime" + ) + ), + "ConferenceSeries" -> Tuple2( + "mag/ConferenceSeries.txt", + Seq( + "ConferenceSeriesId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "EntityRelatedEntities" -> Tuple2( + "advanced/EntityRelatedEntities.txt", + Seq( + "EntityId:long", + "EntityType:string", + "RelatedEntityId:long", + "RelatedEntityType:string", + "RelatedType:int", + "Score:float" + ) + ), + "FieldOfStudyChildren" -> Tuple2( + "advanced/FieldOfStudyChildren.txt", + Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long") + ), + "FieldOfStudyExtendedAttributes" -> Tuple2( + "advanced/FieldOfStudyExtendedAttributes.txt", + Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string") + ), + "FieldsOfStudy" -> Tuple2( + "advanced/FieldsOfStudy.txt", + Seq( + "FieldOfStudyId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "MainType:string", + "Level:int", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "Journals" -> Tuple2( + "mag/Journals.txt", + Seq( + "JournalId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "Issn:string", + "Publisher:string", + "Webpage:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "PaperAbstractsInvertedIndex" -> Tuple2( + "nlp/PaperAbstractsInvertedIndex.txt.*", + Seq("PaperId:long", "IndexedAbstract:string") + ), + "PaperAuthorAffiliations" -> Tuple2( + "mag/PaperAuthorAffiliations.txt", + Seq( + "PaperId:long", + "AuthorId:long", + "AffiliationId:long?", + "AuthorSequenceNumber:uint", + "OriginalAuthor:string", + "OriginalAffiliation:string" + ) + ), + "PaperCitationContexts" -> Tuple2( + "nlp/PaperCitationContexts.txt", + Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string") + ), + "PaperExtendedAttributes" -> Tuple2( + "mag/PaperExtendedAttributes.txt", + Seq("PaperId:long", "AttributeType:int", "AttributeValue:string") + ), + "PaperFieldsOfStudy" -> Tuple2( + "advanced/PaperFieldsOfStudy.txt", + Seq("PaperId:long", "FieldOfStudyId:long", "Score:float") + ), + "PaperMeSH" -> Tuple2( + "advanced/PaperMeSH.txt", + Seq( + "PaperId:long", + "DescriptorUI:string", + "DescriptorName:string", + "QualifierUI:string", + "QualifierName:string", + "IsMajorTopic:bool" + ) + ), + "PaperRecommendations" -> Tuple2( + "advanced/PaperRecommendations.txt", + Seq("PaperId:long", "RecommendedPaperId:long", "Score:float") + ), + "PaperReferences" -> Tuple2( + "mag/PaperReferences.txt", + Seq("PaperId:long", "PaperReferenceId:long") + ), + "PaperResources" -> Tuple2( + "mag/PaperResources.txt", + Seq( + "PaperId:long", + "ResourceType:int", + "ResourceUrl:string", + "SourceUrl:string", + "RelationshipType:int" + ) + ), + "PaperUrls" -> Tuple2( + "mag/PaperUrls.txt", + Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string") + ), + "Papers" -> Tuple2( + "mag/Papers.txt", + Seq( + "PaperId:long", + "Rank:uint", + "Doi:string", + "DocType:string", + "PaperTitle:string", + "OriginalTitle:string", + "BookTitle:string", + "Year:int?", + "Date:DateTime?", + "OnlineDate:DateTime?", + "Publisher:string", + "JournalId:long?", + "ConferenceSeriesId:long?", + "ConferenceInstanceId:long?", + "Volume:string", + "Issue:string", + "FirstPage:string", + "LastPage:string", + "ReferenceCount:long", + "CitationCount:long", + "EstimatedCitation:long", + "OriginalVenue:string", + "FamilyId:long?", + "FamilyRank:uint?", + "DocSubTypes:string", + "CreatedDate:DateTime" + ) + ), + "RelatedFieldOfStudy" -> Tuple2( + "advanced/RelatedFieldOfStudy.txt", + Seq( + "FieldOfStudyId1:long", + "Type1:string", + "FieldOfStudyId2:long", + "Type2:string", + "Rank:float" + ) + ) ) - def getSchema(streamName: String): StructType = { var schema = new StructType() val d: Seq[String] = stream(streamName)._2 @@ -61,19 +262,22 @@ object SparkImportMagIntoDataset { schema } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() stream.foreach { case (k, v) => val s: StructType = getSchema(k) From b78d2b71f04b0709b3ac1d781c6778101d7ce799 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 12 Jan 2022 09:38:34 +0100 Subject: [PATCH 135/176] updated scala format configuration --- .../src/main/resources/scalafmt/scalafmt.conf | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf index 00c866f28..0b5dbe0b4 100644 --- a/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf +++ b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf @@ -7,9 +7,15 @@ continuationIndent.callSite = 2 continuationIndent.defnSite = 2 danglingParentheses = true indentOperator = spray -maxColumn = 100 +maxColumn = 120 newlines.alwaysBeforeTopLevelStatements = true -#project.excludeFilters = ["\.*\.sbt"] -rewrite.rules = [RedundantParens, SortImports] +project.excludeFilters = [".*\\.sbt"] +rewrite.rules = [AvoidInfix] +rewrite.rules = [ExpandImportSelectors] +rewrite.rules = [RedundantBraces] +rewrite.rules = [RedundantParens] +rewrite.rules = [SortImports] +rewrite.rules = [SortModifiers] +rewrite.rules = [PreferCurlyFors] spaces.inImportCurlyBraces = false unindentTopLevelOperators = true \ No newline at end of file From 57e2c4b74940a6269b389bbe57f0e9d90a944e89 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 12 Jan 2022 09:40:28 +0100 Subject: [PATCH 136/176] formatted code --- .../dhp/datacite/DataciteAPIImporter.scala | 3 +-- .../DataciteToOAFTransformation.scala | 8 ++----- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 11 ++------- .../doiboost/SparkGenerateDoiBoost.scala | 3 +-- .../doiboost/crossref/Crossref2Oaf.scala | 10 ++++---- .../doiboost/mag/SparkProcessMAG.scala | 4 +--- .../crossref/CrossrefMappingTest.scala | 24 +++++-------------- .../sx/graph/SparkConvertRDDtoDataset.scala | 9 +------ .../dhp/sx/graph/pangaea/PangaeaUtils.scala | 3 +-- .../oa/graph/hostedbymap/TestPrepare.scala | 6 +---- .../oa/graph/hostedbymap/TestPreprocess.scala | 3 +-- .../resolution/ResolveEntitiesTest.scala | 24 ++++--------------- .../sx/graph/scholix/ScholixGraphTest.scala | 8 ++----- 13 files changed, 27 insertions(+), 89 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala index 55823abd9..d2fd709aa 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala @@ -3,8 +3,7 @@ package eu.dnetlib.dhp.datacite import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.{DefaultFormats, JValue} -class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) - extends AbstractRestClient { +class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) extends AbstractRestClient { override def extractInfo(input: String): Unit = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 13866d0d0..a0b7cd95e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -327,9 +327,7 @@ object DataciteToOAFTransformation { a.setFullname(c.name.orNull) a.setName(c.givenName.orNull) a.setSurname(c.familyName.orNull) - if ( - c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null - ) { + if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) { a.setPid( c.nameIdentifiers.get .map(ni => { @@ -395,9 +393,7 @@ object DataciteToOAFTransformation { .find(d => d.dateType.get.equalsIgnoreCase("issued")) .map(d => extract_date(d.date.get)) val a_date: Option[String] = dates - .filter(d => - d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available") - ) + .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")) .map(d => extract_date(d.date.get)) .find(d => d != null && d.isDefined) .map(d => d.get) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 857609368..92ad22c57 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -2,12 +2,7 @@ package eu.dnetlib.dhp.sx.bio.pubmed import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{ - GraphCleaningFunctions, - IdentifierFactory, - OafMapperUtils, - PidType -} +import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ import collection.JavaConverters._ @@ -169,9 +164,7 @@ object PubMedToOaf { pubmedInstance.setInstancetype(cojbCategory) } else { val i_type = article.getPublicationTypes.asScala - .map(s => - getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue) - ) + .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .find(q => q != null) if (i_type.isDefined) pubmedInstance.setInstancetype(i_type.get) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 239082d45..9323c994c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -59,8 +59,7 @@ object SparkGenerateDoiBoost { val workingDirPath = parser.get("workingPath") val openaireOrganizationPath = parser.get("openaireOrganizationPath") - val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] - with Serializable { + val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { override def zero: Publication = new Publication override def reduce(b: Publication, a: (String, Publication)): Publication = { diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 7770ea249..0cb08ea94 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -438,11 +438,10 @@ case object Crossref2Oaf { funders.foreach(funder => { if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { funder.DOI.get match { - case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | - "10.13039/501100000780" | "10.13039/100010665" => + case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" | + "10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | - "10.13039/501100000780" => + case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) @@ -512,8 +511,7 @@ case object Crossref2Oaf { case "European Union's" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - case "The French National Research Agency (ANR)" | - "The French National Research Agency" => + case "The French National Research Agency (ANR)" | "The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a) case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 5c960c15b..eae669853 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -15,9 +15,7 @@ object SparkProcessMAG { def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { d.where(col("Doi").isNotNull) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1: MagPapers, p2: MagPapers) => - ConversionUtil.choiceLatestMagArtitcle(p1, p2) - ) + .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) .map(_._2)(Encoders.product[MagPapers]) .map(mp => { MagPapers( diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index ad3d4b3d4..8124a5aae 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -223,9 +223,7 @@ class CrossrefMappingTest { val collectedFromList = result.getCollectedfrom.asScala assert( - collectedFromList.exists(c => - c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") - ), + collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion" ) @@ -301,9 +299,7 @@ class CrossrefMappingTest { val collectedFromList = result.getCollectedfrom.asScala assert( - collectedFromList.exists(c => - c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") - ), + collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion" ) @@ -435,9 +431,7 @@ class CrossrefMappingTest { val collectedFromList = result.getCollectedfrom.asScala assert( - collectedFromList.exists(c => - c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") - ), + collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion" ) @@ -553,9 +547,7 @@ class CrossrefMappingTest { println(mapper.writeValueAsString(item)) assertTrue( - item.getInstance().asScala exists (i => - i.getLicense.getValue.equals("https://www.springer.com/vor") - ) + item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor")) ) assertTrue( item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED")) @@ -590,9 +582,7 @@ class CrossrefMappingTest { ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) assertTrue( - item.getInstance().asScala exists (i => - i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid - ) + item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid) ) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) @@ -627,9 +617,7 @@ class CrossrefMappingTest { ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) assertTrue( - item.getInstance().asScala exists (i => - i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid - ) + item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid) ) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 1bfb9a9ac..f13c14da5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,14 +2,7 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{ - OtherResearchProduct, - Publication, - Relation, - Result, - Software, - Dataset => OafDataset -} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala index 00d0d66c4..23f4da6c7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -90,8 +90,7 @@ object PangaeaUtils { ) } - def getDatasetAggregator() - : Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] = + def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] = new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] { override def zero: PangaeaDataModel = null diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index 23b42009e..5fc29e3b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -1,11 +1,7 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{ - joinResHBM, - prepareResultInfo, - toEntityInfo -} +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index cc7131119..12879c466 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -51,8 +51,7 @@ class TestPreprocess extends java.io.Serializable { ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1 ) assertTrue( - ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")) - .count == 1 + ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1 ) assertTrue( ds.filter(hbi => diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 3cdc370d6..c8e41743f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -189,11 +189,7 @@ class ResolveEntitiesTest extends Serializable { var ct = pubDS.count() var et = pubDS - .filter(p => - p.getTitle != null && p.getTitle.asScala.forall(t => - t.getValue != null && t.getValue.nonEmpty - ) - ) + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) .count() assertEquals(ct, et) @@ -208,11 +204,7 @@ class ResolveEntitiesTest extends Serializable { .count() ct = datDS.count() et = datDS - .filter(p => - p.getTitle != null && p.getTitle.asScala.forall(t => - t.getValue != null && t.getValue.nonEmpty - ) - ) + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) .count() assertEquals(ct, et) @@ -226,11 +218,7 @@ class ResolveEntitiesTest extends Serializable { .count() ct = softDS.count() et = softDS - .filter(p => - p.getTitle != null && p.getTitle.asScala.forall(t => - t.getValue != null && t.getValue.nonEmpty - ) - ) + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) .count() assertEquals(ct, et) @@ -245,11 +233,7 @@ class ResolveEntitiesTest extends Serializable { ct = orpDS.count() et = orpDS - .filter(p => - p.getTitle != null && p.getTitle.asScala.forall(t => - t.getValue != null && t.getValue.nonEmpty - ) - ) + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) .count() assertEquals(ct, et) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index 6dc945d4b..e92f36896 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -56,9 +56,7 @@ class ScholixGraphTest extends AbstractVocabularyTest { assertNotNull(result) assertEquals(result.size, items.size) - val d = result.find(s => - s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty) - ) + val d = result.find(s => s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty)) assertFalse(d.isDefined) println(mapper.writeValueAsString(result.head)) @@ -74,9 +72,7 @@ class ScholixGraphTest extends AbstractVocabularyTest { val result: List[(Relation, ScholixSummary)] = inputRelations.lines .sliding(2) .map(s => (s.head, s(1))) - .map(p => - (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])) - ) + .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))) .toList assertNotNull(result) assertTrue(result.nonEmpty) From 4993666d73786a2ca420cbd5658732c0768203de Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 12 Jan 2022 16:53:47 +0100 Subject: [PATCH 137/176] [BipFinderInstanceLevel] changed creation of the instance to allow to enrich existing instances with same pid --- .../main/java/eu/dnetlib/dhp/actionmanager/Constants.java | 1 + .../createunresolvedentities/PrepareBipFinder.java | 8 ++++++++ pom.xml | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 3a46228d8..b790d90cb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -18,6 +18,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class Constants { public static final String DOI = "doi"; + public static final String DOI_CLASSNAME = "Digital Object Identifier"; public static final String DEFAULT_DELIMITER = ","; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index e9c9f0350..30cfce903 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -95,11 +96,18 @@ public class PrepareBipFinder implements Serializable { }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) .map((MapFunction) v -> { Result r = new Result(); + final String cleanedPid = CleaningFunctions.normalizePidValue(DOI, v.getId()); r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI)); Instance inst = new Instance(); inst.setMeasures(getMeasure(v)); + + inst.setPid(Arrays.asList(OafMapperUtils.structuredProperty(cleanedPid, + OafMapperUtils.qualifier(DOI, DOI_CLASSNAME, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES), null))); r.setInstance(Arrays.asList(inst)); + r.setDataInfo(OafMapperUtils.dataInfo(false,null,null,false, OafMapperUtils.qualifier(ModelConstants.PROVENANCE_ENRICH, null,ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS))); return r; }, Encoders.bean(Result.class)) .write() diff --git a/pom.xml b/pom.xml index b68671aec..34733cee4 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.24] + [2.9.25-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From dbd6fa1d65614d307d2e11b7d157ec05eeb8bae3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 12 Jan 2022 17:19:38 +0100 Subject: [PATCH 138/176] scalafmt: remote referencing the common definition files makes it work compiling the entire project as well as the individual submodules --- .gitignore | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 0b794ef74..73d9179fa 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,4 @@ spark-warehouse /**/job-override.properties /**/*.log /**/.factorypath - +/**/.scalafmt.conf diff --git a/pom.xml b/pom.xml index b68671aec..746f4e773 100644 --- a/pom.xml +++ b/pom.xml @@ -681,7 +681,7 @@ org.antipathy mvn-scalafmt_2.11 - dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf + https://code-repo.d4science.org/D-Net/dnet-hadoop/raw/branch/beta/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf false false From e7d5a39c035d9d273f562dc5151305f0d759c540 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 12 Jan 2022 17:25:04 +0100 Subject: [PATCH 139/176] [BipFinderInstanceLevel] added tests in test class --- .../PrepareBipFinder.java | 33 +++++++++++++--- .../createunresolvedentities/ProduceTest.java | 39 +++++++++++++++++++ 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 30cfce903..80573c71a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,7 +11,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -34,6 +33,7 @@ import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; @@ -102,12 +102,33 @@ public class PrepareBipFinder implements Serializable { Instance inst = new Instance(); inst.setMeasures(getMeasure(v)); - inst.setPid(Arrays.asList(OafMapperUtils.structuredProperty(cleanedPid, - OafMapperUtils.qualifier(DOI, DOI_CLASSNAME, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES), null))); + inst + .setPid( + Arrays + .asList( + OafMapperUtils + .structuredProperty( + cleanedPid, + OafMapperUtils + .qualifier( + DOI, DOI_CLASSNAME, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES), + null))); r.setInstance(Arrays.asList(inst)); - r.setDataInfo(OafMapperUtils.dataInfo(false,null,null,false, OafMapperUtils.qualifier(ModelConstants.PROVENANCE_ENRICH, null,ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS))); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 02c6582f1..8c20475ff 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -245,6 +245,45 @@ public class ProduceTest { .get(0) .getValue()); + Assertions.assertEquals("10.3390/s18072310", + tmp.filter(row -> row.getId().equals(doi)).collect() + .get(0) + .getInstance().get(0) + .getPid().get(0) + .getValue().toLowerCase()); + + Assertions.assertEquals("doi", + tmp.filter(row -> row.getId().equals(doi)).collect() + .get(0) + .getInstance().get(0) + .getPid().get(0) + .getQualifier().getClassid()); + + Assertions.assertEquals("Digital Object Identifier", + tmp.filter(row -> row.getId().equals(doi)).collect() + .get(0) + .getInstance().get(0) + .getPid().get(0) + .getQualifier().getClassname()); + + } + + @Test + void produceTestMeasures() throws Exception { + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + List mes = tmp + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getPid().iterator()) + .collect(); + + Assertions.assertEquals(86, mes.size()); + + tmp.filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .foreach(e -> Assertions.assertEquals("sysimport:enrich", e.getDataInfo().getProvenanceaction().getClassid())); + } @Test From a75fb8c47ae0cd44398823a20ea2a24e1985d2ab Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 12 Jan 2022 18:06:26 +0100 Subject: [PATCH 140/176] [BipFinderInstanceLevel] change pom to align to the dhp-schema release 2.10.24 and refactoring --- .../createunresolvedentities/ProduceTest.java | 71 ++++++++++++------- pom.xml | 2 +- 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index 8c20475ff..ce44f0036 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -245,26 +245,47 @@ public class ProduceTest { .get(0) .getValue()); - Assertions.assertEquals("10.3390/s18072310", - tmp.filter(row -> row.getId().equals(doi)).collect() - .get(0) - .getInstance().get(0) - .getPid().get(0) - .getValue().toLowerCase()); + Assertions + .assertEquals( + "10.3390/s18072310", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getValue() + .toLowerCase()); - Assertions.assertEquals("doi", - tmp.filter(row -> row.getId().equals(doi)).collect() - .get(0) - .getInstance().get(0) - .getPid().get(0) - .getQualifier().getClassid()); + Assertions + .assertEquals( + "doi", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getQualifier() + .getClassid()); - Assertions.assertEquals("Digital Object Identifier", - tmp.filter(row -> row.getId().equals(doi)).collect() - .get(0) - .getInstance().get(0) - .getPid().get(0) - .getQualifier().getClassname()); + Assertions + .assertEquals( + "Digital Object Identifier", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getQualifier() + .getClassname()); } @@ -274,15 +295,17 @@ public class ProduceTest { JavaRDD tmp = getResultJavaRDD(); List mes = tmp - .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) - .flatMap(row -> row.getInstance().iterator()) - .flatMap(i -> i.getPid().iterator()) - .collect(); + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getPid().iterator()) + .collect(); Assertions.assertEquals(86, mes.size()); - tmp.filter(row -> row.getInstance() != null && row.getInstance().size() > 0) - .foreach(e -> Assertions.assertEquals("sysimport:enrich", e.getDataInfo().getProvenanceaction().getClassid())); + tmp + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .foreach( + e -> Assertions.assertEquals("sysimport:enrich", e.getDataInfo().getProvenanceaction().getClassid())); } diff --git a/pom.xml b/pom.xml index a27f89f9f..7a026e668 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.25-SNAPSHOT] + [2.10.24] [4.0.3] [6.0.5] [3.1.6] From a7c4d0d16d6de8af220cfa953eaecd1235da19b1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 13 Jan 2022 13:52:00 +0100 Subject: [PATCH 141/176] [DoiBoost Organizations] added parameter to specify the action in the wf raw_organizations to be able to load the openorgs organization as in the loading step for the construction of the graph --- .../oa/graph/raw_organizations/oozie_app/workflow.xml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml index fb6e02555..70ecef154 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml @@ -43,13 +43,18 @@ beta the database schema according to the D-Net infrastructure (beta or production) + + openOrgsAction + openorgs_dedup + The action to be executed during the import for OpenOrgs + isLookupUrl the address of the lookUp service nsPrefixBlacklist - + foo a blacklist of nsprefixes (comma separeted) @@ -156,7 +161,7 @@ --postgresUser${postgresOpenOrgsUser} --postgresPassword${postgresOpenOrgsPassword} --isLookupUrl${isLookupUrl} - --actionopenorgs_dedup + --action${openOrgsAction} --dbschema${dbSchema} --nsPrefixBlacklist${nsPrefixBlacklist} From 42e8f76778c23b1167f4ae62486dc19ea0ac404b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 13 Jan 2022 16:06:43 +0100 Subject: [PATCH 142/176] [GraphCleaning] change the return value in the filtering function to avoid to lose the APC entities --- .../eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 37f5573d0..351bd2dd5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -101,7 +101,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { .orElse(true)) .orElse(true)) .orElse(true))) { - return false; + return true; } if (value instanceof Datasource) { From 44a937f4ed4bc32211a7a9f6c7218f98fe1488a9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 12:24:52 +0100 Subject: [PATCH 143/176] factored out entity grouping implementation, extended to consider results from delegated authorities rather than identical records from other sources --- .../oa/merge}/DispatchEntitiesSparkJob.java | 22 +- .../dhp/oa/merge}/GroupEntitiesSparkJob.java | 46 ++- .../dhp/schema/oaf/utils/OafMapperUtils.java | 18 ++ .../dedup/consistency/oozie_app/workflow.xml | 16 +- .../group/dispatch_entities_parameters.json | 26 ++ .../group_graph_entities_parameters.json | 20 ++ .../graph/group/oozie_app/config-default.xml | 18 ++ .../dhp/oa/graph/group/oozie_app/workflow.xml | 289 ++++++++++++++++++ pom.xml | 2 +- 9 files changed, 412 insertions(+), 45 deletions(-) rename {dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge}/DispatchEntitiesSparkJob.java (96%) rename {dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge}/GroupEntitiesSparkJob.java (98%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java similarity index 96% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index ea738836b..caa9e8106 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -1,11 +1,11 @@ -package eu.dnetlib.dhp.oa.dedup; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.Objects; -import java.util.Optional; +package eu.dnetlib.dhp.oa.merge; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -17,12 +17,10 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.util.Objects; +import java.util.Optional; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; public class DispatchEntitiesSparkJob { @@ -38,7 +36,7 @@ public class DispatchEntitiesSparkJob { .requireNonNull( DispatchEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); + "/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java similarity index 98% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index a19f86380..771eba873 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -1,15 +1,17 @@ -package eu.dnetlib.dhp.oa.dedup; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; - -import java.io.IOException; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; +package eu.dnetlib.dhp.oa.merge; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -20,21 +22,17 @@ import org.apache.spark.sql.*; import org.apache.spark.sql.expressions.Aggregator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.jayway.jsonpath.Configuration; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import com.jayway.jsonpath.Option; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; + /** * Groups the graph content by entity identifier to ensure ID uniqueness */ @@ -53,7 +51,7 @@ public class GroupEntitiesSparkJob { .toString( GroupEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 720fe47fb..bbdd59975 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -47,6 +47,17 @@ public class OafMapperUtils { } public static Result mergeResults(Result left, Result right) { + + final boolean leftFromDeletedAuthority = isFromDeletedAuthority(left); + final boolean rightFromDeletedAuthority = isFromDeletedAuthority(right); + + if (leftFromDeletedAuthority && !rightFromDeletedAuthority) { + return left; + } + if (!leftFromDeletedAuthority && rightFromDeletedAuthority) { + return right; + } + if (new ResultTypeComparator().compare(left, right) < 0) { left.mergeFrom(right); return left; @@ -56,6 +67,13 @@ public class OafMapperUtils { } } + private static boolean isFromDeletedAuthority(Result r) { + return r.getInstance() + .stream() + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); + } + public static KeyValue keyValue(final String k, final String v) { final KeyValue kv = new KeyValue(); kv.setKey(k); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index 4ea003926..7c500493f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -104,7 +104,7 @@ yarn cluster group graph entities - eu.dnetlib.dhp.oa.dedup.GroupEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -138,7 +138,7 @@ yarn cluster Dispatch publications - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -163,7 +163,7 @@ yarn cluster Dispatch project - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -188,7 +188,7 @@ yarn cluster Dispatch organization - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -213,7 +213,7 @@ yarn cluster Dispatch publication - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -238,7 +238,7 @@ yarn cluster Dispatch dataset - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -263,7 +263,7 @@ yarn cluster Dispatch software - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -288,7 +288,7 @@ yarn cluster Dispatch otherresearchproduct - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json new file mode 100644 index 000000000..aa8d2a7c2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "i", + "paramLongName": "inputPath", + "paramDescription": "the source path", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "path of the output graph", + "paramRequired": true + }, + { + "paramName": "c", + "paramLongName": "graphTableClassName", + "paramDescription": "the graph entity class name", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json new file mode 100644 index 000000000..e65acb3c4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "gin", + "paramLongName": "graphInputPath", + "paramDescription": "the graph root path", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the output merged graph root path", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml new file mode 100644 index 000000000..2e0ed9aee --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml new file mode 100644 index 000000000..883d0e1fb --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -0,0 +1,289 @@ + + + + graphBasePath + the input graph base path + + + workingPath + path of the working directory + + + graphOutputPath + path of the output graph + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + group graph entities + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=15000 + + --graphInputPath${graphBasePath} + --outputPath${workingPath}/grouped_entities + + + + + + + + + + + + + + + + + + yarn + cluster + Dispatch publications + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/datasource + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource + + + + + + + + yarn + cluster + Dispatch project + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/project + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project + + + + + + + + yarn + cluster + Dispatch organization + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/organization + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization + + + + + + + + yarn + cluster + Dispatch publication + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + Dispatch dataset + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + Dispatch software + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + yarn + cluster + Dispatch otherresearchproduct + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 7a026e668..86c3b4526 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.24] + [2.10.26-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 62f135262e6d811c3e1c5559ef5370a376a87454 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 12:30:52 +0100 Subject: [PATCH 144/176] code formatting --- .../oa/merge/DispatchEntitiesSparkJob.java | 18 ++++---- .../dhp/oa/merge/GroupEntitiesSparkJob.java | 42 ++++++++++--------- .../dhp/schema/oaf/utils/OafMapperUtils.java | 9 ++-- 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index caa9e8106..d2bc8f45d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.oa.merge; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -17,10 +17,12 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Objects; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; public class DispatchEntitiesSparkJob { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index 771eba873..91cb543ab 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -1,17 +1,15 @@ package eu.dnetlib.dhp.oa.merge; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.jayway.jsonpath.Configuration; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import com.jayway.jsonpath.Option; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -22,17 +20,21 @@ import org.apache.spark.sql.*; import org.apache.spark.sql.expressions.Aggregator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; -import java.io.IOException; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; - /** * Groups the graph content by entity identifier to ensure ID uniqueness */ diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index bbdd59975..2f1fc3a58 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -68,10 +68,11 @@ public class OafMapperUtils { } private static boolean isFromDeletedAuthority(Result r) { - return r.getInstance() - .stream() - .map(i -> i.getCollectedfrom().getKey()) - .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); + return r + .getInstance() + .stream() + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); } public static KeyValue keyValue(final String k, final String v) { From 391aa1373bbc948a447ce2bdc026d5aacb9eb7ab Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 17:13:21 +0100 Subject: [PATCH 145/176] added unit test --- .../oa/merge/DispatchEntitiesSparkJob.java | 2 +- .../dhp/oa/merge/GroupEntitiesSparkJob.java | 2 +- .../merge}/dispatch_entities_parameters.json | 0 .../group_graph_entities_parameters.json | 0 .../group/dispatch_entities_parameters.json | 26 ------ .../group_graph_entities_parameters.json | 20 ---- .../group/GroupEntitiesSparkJobTest.java | 91 +++++++++++++++++++ .../dhp/oa/graph/group/dataset/dataset.json | 3 + .../graph/group/publication/publication.json | 3 + 9 files changed, 99 insertions(+), 48 deletions(-) rename {dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge}/dispatch_entities_parameters.json (100%) rename {dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge}/group_graph_entities_parameters.json (100%) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index d2bc8f45d..3f65d754f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -38,7 +38,7 @@ public class DispatchEntitiesSparkJob { .requireNonNull( DispatchEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json"))); + "/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index 91cb543ab..e652bd5b6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -53,7 +53,7 @@ public class GroupEntitiesSparkJob { .toString( GroupEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json deleted file mode 100644 index aa8d2a7c2..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "i", - "paramLongName": "inputPath", - "paramDescription": "the source path", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "path of the output graph", - "paramRequired": true - }, - { - "paramName": "c", - "paramLongName": "graphTableClassName", - "paramDescription": "the graph entity class name", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json deleted file mode 100644 index e65acb3c4..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "gin", - "paramLongName": "graphInputPath", - "paramDescription": "the graph root path", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the output merged graph root path", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java new file mode 100644 index 000000000..5214c1783 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.oa.graph.group; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Result; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +public class GroupEntitiesSparkJobTest { + + private static SparkSession spark; + + private Path workingDir; + //private Path inputDir; + private Path graphInputPath; + + private Path outputPath; + + @BeforeAll + public static void beforeAll() { + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } + + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + //inputDir = workingDir.resolve("input"); + graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + outputPath = workingDir.resolve("output"); + } + + @AfterEach + public void afterEach() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + } + + @AfterAll + public static void afterAll() { + spark.stop(); + } + + @Test + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + graphInputPath.toString(), + "-outputPath", + outputPath.toString() + }); + + ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + Dataset output = spark + .read() + .textFile(outputPath.toString()) + .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + Assertions.assertEquals( + 1, + output + .filter((FilterFunction) r -> + "50|doi_________::09821844208a5cd6300b2bfb13bca1b9".equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo")) ) + .count()); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json new file mode 100644 index 000000000..e30be47e9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json new file mode 100644 index 000000000..29ce76df3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file From abfa9c60458ba7698c724e681b27db58fd7754cb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 17:17:11 +0100 Subject: [PATCH 146/176] code formatting --- .../group/GroupEntitiesSparkJobTest.java | 129 +++++++++--------- 1 file changed, 65 insertions(+), 64 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java index 5214c1783..729948f52 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.oa.graph.group; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Result; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -17,75 +17,76 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Result; public class GroupEntitiesSparkJobTest { - private static SparkSession spark; + private static SparkSession spark; - private Path workingDir; - //private Path inputDir; - private Path graphInputPath; + private Path workingDir; + private Path graphInputPath; - private Path outputPath; + private Path outputPath; - @BeforeAll - public static void beforeAll() { - SparkConf conf = new SparkConf(); - conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); - conf.setMaster("local"); - conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - spark = SparkSession.builder().config(conf).getOrCreate(); - } + @BeforeAll + public static void beforeAll() { + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } - @BeforeEach - public void beforeEach() throws IOException, URISyntaxException { - workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); - //inputDir = workingDir.resolve("input"); - graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); - outputPath = workingDir.resolve("output"); - } + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + outputPath = workingDir.resolve("output"); + } - @AfterEach - public void afterEach() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - } + @AfterEach + public void afterEach() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + } - @AfterAll - public static void afterAll() { - spark.stop(); - } + @AfterAll + public static void afterAll() { + spark.stop(); + } - @Test - void testGroupEntities() throws Exception { - GroupEntitiesSparkJob.main(new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-graphInputPath", - graphInputPath.toString(), - "-outputPath", - outputPath.toString() - }); + @Test + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + graphInputPath.toString(), + "-outputPath", + outputPath.toString() + }); - ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - Dataset output = spark - .read() - .textFile(outputPath.toString()) - .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) - .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + Dataset output = spark + .read() + .textFile(outputPath.toString()) + .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); - Assertions.assertEquals( - 1, - output - .filter((FilterFunction) r -> - "50|doi_________::09821844208a5cd6300b2bfb13bca1b9".equals(r.getId()) && - r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo")) ) - .count()); - } + Assertions + .assertEquals( + 1, + output + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); + } } From 3b9020c1b7803d81ed4a9aaf9acd9a7b58f00b23 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 18:15:55 +0100 Subject: [PATCH 147/176] added unit test for the DispatchEntitiesJob --- .../group/GroupEntitiesSparkJobTest.java | 104 +++++++++++++----- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java index 729948f52..3bd1c13de 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.graph.group; +import static org.junit.jupiter.api.Assertions.*; + import java.io.IOException; import java.net.URISyntaxException; import java.nio.file.Files; @@ -19,22 +21,34 @@ import org.junit.jupiter.api.*; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob; import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class GroupEntitiesSparkJobTest { private static SparkSession spark; - private Path workingDir; - private Path graphInputPath; + private static ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - private Path outputPath; + private static Path workingDir; + private Path dataInputPath; + + private Path groupEntityPath; + private Path dispatchEntityPath; @BeforeAll - public static void beforeAll() { + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + SparkConf conf = new SparkConf(); conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); conf.setMaster("local"); @@ -45,48 +59,86 @@ public class GroupEntitiesSparkJobTest { @BeforeEach public void beforeEach() throws IOException, URISyntaxException { - workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); - graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); - outputPath = workingDir.resolve("output"); - } - - @AfterEach - public void afterEach() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); + dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + groupEntityPath = workingDir.resolve("grouped_entity"); + dispatchEntityPath = workingDir.resolve("dispatched_entity"); } @AfterAll - public static void afterAll() { + public static void afterAll() throws IOException { spark.stop(); + FileUtils.deleteDirectory(workingDir.toFile()); } @Test + @Order(1) void testGroupEntities() throws Exception { GroupEntitiesSparkJob.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphInputPath", - graphInputPath.toString(), + dataInputPath.toString(), "-outputPath", - outputPath.toString() + groupEntityPath.toString() }); - ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); Dataset output = spark .read() - .textFile(outputPath.toString()) + .textFile(groupEntityPath.toString()) .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); - Assertions - .assertEquals( - 1, - output - .filter( - (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" - .equals(r.getId()) && - r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) - .count()); + assertEquals( + 1, + output + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); + } + + @Test + @Order(2) + void testDispatchEntities() throws Exception { + for (String type : Lists + .newArrayList( + Publication.class.getCanonicalName(), eu.dnetlib.dhp.schema.oaf.Dataset.class.getCanonicalName())) { + String directory = StringUtils.substringAfterLast(type, ".").toLowerCase(); + DispatchEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + groupEntityPath.toString(), + "-outputPath", + dispatchEntityPath.resolve(directory).toString(), + "-graphTableClassName", + type + }); + } + + Dataset output = spark + .read() + .textFile( + DHPUtils + .toSeq( + HdfsSupport + .listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration()))) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + assertEquals(3, output.count()); + assertEquals( + 2, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("publication")) + .count()); + assertEquals( + 1, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("dataset")) + .count()); } } From b37bc277c4ff38a42da04b56cd15562f3c8a233a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 09:15:13 +0100 Subject: [PATCH 148/176] reintroduced the hostedby patching to the datacite records --- .../eu/dnetlib/dhp/datacite/hostedBy_map.json | 1021 +---------------- .../DataciteToOAFTransformation.scala | 31 +- 2 files changed, 33 insertions(+), 1019 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json index ddc70bc53..5aa50d6a8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json @@ -1,1032 +1,27 @@ { - "SND.QOG": { - "openaire_id": "re3data_____::r3d100012231", - "datacite_name": "Quality of Government Institute", - "official_name": "Quality of Government Institute's Data", - "similarity": 0.8985507246376812 - }, - "GESIS.CESSDA": { - "openaire_id": "re3data_____::r3d100010202", - "datacite_name": "CESSDA ERIC", - "official_name": "CESSDA ERIC" - }, - "BL.CRAN": { - "openaire_id": "re3data_____::r3d100012068", - "datacite_name": "Cranfield University", - "official_name": "Cranfield Online Research Data" - }, - "SUL.OPENNEURO": { - "openaire_id": "re3data_____::r3d100010924", - "datacite_name": "OpenNeuro", - "official_name": "OpenNeuro" - }, - "UNAVCO.UNAVCO": { - "openaire_id": "re3data_____::r3d100010872", - "datacite_name": "UNAVCO", - "official_name": "UNAVCO" - }, - "SUL.SDR": { - "openaire_id": "re3data_____::r3d100010710", - "datacite_name": "Stanford Digital Repository", - "official_name": "Stanford Digital Repository" - }, - "DK.ICES": { - "openaire_id": "re3data_____::r3d100011288", - "datacite_name": "International Council for the Exploration of the Sea (ICES)", - "official_name": "International Council for the Exploration of the Sea datasets", - "similarity": 0.8833333333333333 - }, - "CISTI.DFOSCIMR": { - "openaire_id": "re3data_____::r3d100012039", - "datacite_name": "Bedford Institute of Oceanography - Fisheries and Oceans Canada - Ocean Data and Information Section", - "official_name": "Bedford Institute of Oceanography - Oceanographic Databases" - }, - "CSIC.DIGITAL": { - "openaire_id": "re3data_____::r3d100011076", - "datacite_name": "Digital CSIC", - "official_name": "DIGITAL.CSIC" + "CERN.ZENODO": { + "openaire_id": "opendoar____::2659", + "datacite_name": "Zenodo", + "official_name": "ZENODO" }, "TIB.PANGAEA": { "openaire_id": "re3data_____::r3d100010134", "datacite_name": "PANGAEA", "official_name": "PANGAEA" }, - "PSU.DATACOM": { - "openaire_id": "re3data_____::r3d100010477", - "datacite_name": "Data Commons", - "official_name": "ANU Data Commons", - "similarity": 0.8571428571428571 - }, - "ANDS.CENTRE72": { - "openaire_id": "re3data_____::r3d100010451", - "datacite_name": "PARADISEC", - "official_name": "Pacific and Regional Archive for Digital Sources in Endangered Cultures" - }, - "BL.OXDB": { - "openaire_id": "re3data_____::r3d100011653", - "datacite_name": "Oxford University Library Service Databank", - "official_name": "DataBank, Bodleian Libraries, University of Oxford" - }, - "BL.STANDREW": { - "openaire_id": "re3data_____::r3d100012411", - "datacite_name": "University of St Andrews", - "official_name": "St Andrews Research portal - Research Data" - }, - "TIB.BAFG": { - "openaire_id": "re3data_____::r3d100011664", - "datacite_name": "Bundesanstalt f\u00fcr Gew\u00e4sserkunde", - "official_name": "Geoportal der BFG" - }, - "CRUI.UNIBO": { - "openaire_id": "re3data_____::r3d100012604", - "datacite_name": "Universit\u00e0 degli Studi di Bologna", - "official_name": "AMS Acta" - }, - "GDCC.ODUM-LIBRARY": { - "openaire_id": "re3data_____::r3d100000005", - "datacite_name": "UNC Libraries", - "official_name": "UNC Dataverse" - }, - "RG.RG": { - "openaire_id": "re3data_____::r3d100012227", - "datacite_name": "ResearchGate", - "official_name": "ResearchGate" - }, - "TIB.EUMETSAT": { - "openaire_id": "re3data_____::r3d100010232", - "datacite_name": "EUMETSAT", - "official_name": "Eumetsat" - }, - "SND.SMHI": { - "openaire_id": "re3data_____::r3d100011776", - "datacite_name": "Swedish Meteorological and Hydrological Institute open data", - "official_name": "Swedish Meteorological and Hydrological Institute open data" - }, - "NOAA.NCEI": { - "openaire_id": "re3data_____::r3d100011801", - "datacite_name": "National Oceanic and Atmospheric Administration (NOAA) National Centers for Environmental Information (NCEI)", - "official_name": "NCEI" - }, - "TIB.WDCC": { - "openaire_id": "re3data_____::r3d100010299", - "datacite_name": "World Data Center for Climate", - "official_name": "World Data Center for Climate" - }, - "CNGB.GIGADB": { - "openaire_id": "re3data_____::r3d100010478", - "datacite_name": "GigaDB", - "official_name": "GigaDB" - }, - "DELFT.VLIZ": { - "openaire_id": "re3data_____::r3d100010661", - "datacite_name": "Vlaams Instituut voor de Zee", - "official_name": "Flanders Marine Institute" - }, - "NUS.SB": { - "openaire_id": "re3data_____::r3d100012564", - "datacite_name": "National University of Singapore", - "official_name": "ScholarBank@NUS" - }, - "EDI.EDI": { - "openaire_id": "re3data_____::r3d100010272", - "datacite_name": "Environmental Data Initiative", - "official_name": "Environmental Data Initiative Repository" - }, - "INIST.ADISP": { - "openaire_id": "re3data_____::r3d100010494", - "datacite_name": "Quetelet PROGEDO Diffusion", - "official_name": "Quetelet PROGEDO Diffusion" - }, - "GESIS.SHARE": { - "openaire_id": "re3data_____::r3d100010430", - "datacite_name": "SHARE - ERIC", - "official_name": "Survey of Health, Ageing and Retirement in Europe" - }, - "ANDS.CENTRE-1": { - "openaire_id": "re3data_____::r3d100010864", - "datacite_name": "Griffith University", - "official_name": "Griffith University Research Data Repository" - }, - "BL.READING": { - "openaire_id": "re3data_____::r3d100012064", - "datacite_name": "University of Reading", - "official_name": "University of Reading Research Data Archive" - }, - "CORNELL.CISER": { - "openaire_id": "re3data_____::r3d100011056", - "datacite_name": "CISER Data Archive", - "official_name": "CISER Data Archive" - }, "DRYAD.DRYAD": { "openaire_id": "re3data_____::r3d100000044", "datacite_name": "DRYAD", "official_name": "DRYAD" }, - "CDL.PISCO": { - "openaire_id": "re3data_____::r3d100010947", - "datacite_name": "Partnership for Interdisciplinary Studies of Coastal Oceans (PISCO)", - "official_name": "Partnership for Interdisciplinary Studies of Coastal Oceans" - }, - "IEEE.DATAPORT": { - "openaire_id": "re3data_____::r3d100012569", - "datacite_name": "IEEE DataPort", - "official_name": "IEEE DataPort" - }, - "DELFT.MAASTRO": { - "openaire_id": "re3data_____::r3d100011086", - "datacite_name": "MAASTRO Clinic", - "official_name": "CancerData.org" - }, - "USGS.PROD": { - "openaire_id": "re3data_____::r3d100010054", - "datacite_name": "USGS DOI Tool Production Environment", - "official_name": "U.S. Geological Survey" - }, - "GDCC.ODUM-DV": { - "openaire_id": "re3data_____::r3d100000005", - "datacite_name": "Odum Institute Dataverse", - "official_name": "UNC Dataverse" - }, - "CDL.SDSCSG": { - "openaire_id": "re3data_____::r3d100011690", - "datacite_name": "UCSD Signaling Gateway", - "official_name": "UCSD Signaling gateway" - }, - "ORBIS.NKN": { - "openaire_id": "re3data_____::r3d100011587", - "datacite_name": "Northwest Knowledge Network", - "official_name": "Northwest Knowledge Network" - }, - "ANDS.CENTRE63": { - "openaire_id": "re3data_____::r3d100010918", - "datacite_name": "Test: Atlas of Living Australia", - "official_name": "Atlas of Living Australia", - "similarity": 0.8928571428571429 - }, - "SML.TALKBANK": { - "openaire_id": "re3data_____::r3d100010887", - "datacite_name": "TalkBank", - "official_name": "TalkBank" - }, - "CORNELL.LIBRARY": { - "openaire_id": "re3data_____::r3d100012322", - "datacite_name": "Cornell University Library", - "official_name": "eCommons - Cornell's digital repository" - }, - "BL.SOTON": { - "openaire_id": "re3data_____::r3d100011245", - "datacite_name": "University of Southampton", - "official_name": "University of Southampton Institutional Research Repository" - }, - "GESIS.DB-BANK": { - "openaire_id": "re3data_____::r3d100012252", - "datacite_name": "Forschungsdaten- und Servicezentrum der Bundesbank", - "official_name": "Forschungsdaten- und Servicezentrum der Bundesbank" - }, - "ANDS.CENTRE68": { - "openaire_id": "re3data_____::r3d100010918", - "datacite_name": "Atlas of Living Australia", - "official_name": "Atlas of Living Australia" - }, - "ANDS.CENTRE69": { - "openaire_id": "re3data_____::r3d100010914", - "datacite_name": "Australian Ocean Data Network", - "official_name": "Australian Ocean Data Network Portal" - }, - "INIST.CDS": { - "openaire_id": "re3data_____::r3d100010584", - "datacite_name": "Strasbourg Astronomical Data Center", - "official_name": "Strasbourg Astronomical Data Center" - }, - "BL.NHM": { - "openaire_id": "re3data_____::r3d100011675", - "datacite_name": "Natural History Museum, London", - "official_name": "Natural History Museum, Data Portal" - }, - "BL.ADS": { - "openaire_id": "re3data_____::r3d100000006", - "datacite_name": "Archaeology Data Service", - "official_name": "Archaeology Data Service" - }, - "GDCC.JHU": { - "openaire_id": "re3data_____::r3d100011836", - "datacite_name": "Johns Hopkins University Library", - "official_name": "Johns Hopkins Data Archive Dataverse Network" - }, - "BL.ED": { - "openaire_id": "re3data_____::r3d100000047", - "datacite_name": "University of Edinburgh", - "official_name": "Edinburgh DataShare" - }, - "BL.EXETER": { - "openaire_id": "re3data_____::r3d100011202", - "datacite_name": "University of Exeter", - "official_name": "Open Research Exeter" - }, - "BL.NCL": { - "openaire_id": "re3data_____::r3d100012408", - "datacite_name": "Newcastle University", - "official_name": "NCL Data" - }, - "BROWN.BDR": { - "openaire_id": "re3data_____::r3d100011654", - "datacite_name": "Brown Digital Repository", - "official_name": "Brown Digital Repository" - }, - "GDCC.SYR-QDR": { - "openaire_id": "re3data_____::r3d100011038", - "datacite_name": "Syracuse University Qualitative Data Repository", - "official_name": "Qualitative Data Repository" - }, - "BL.BRISTOL": { - "openaire_id": "re3data_____::r3d100011099", - "datacite_name": "University of Bristol", - "official_name": "data.bris Research Data Repository" - }, - "DATACITE.DATACITE": { - "openaire_id": "openaire____::datacite", - "datacite_name": "DataCite", - "official_name": "Datacite" - }, - "ESTDOI.KEEL": { - "openaire_id": "re3data_____::r3d100011941", - "datacite_name": "Keeleressursid. The Center of Estonian Language Resources", - "official_name": "Center of Estonian Language Resources" - }, - "BL.ESSEX": { - "openaire_id": "re3data_____::r3d100012405", - "datacite_name": "University of Essex", - "official_name": "Research Data at Essex" - }, - "PURDUE.MDF": { - "openaire_id": "re3data_____::r3d100012080", - "datacite_name": "Univ Chicago Materials Data Facility", - "official_name": "Materials Data Facility" - }, - "DELFT.KNMI": { - "openaire_id": "re3data_____::r3d100011879", - "datacite_name": "KNMI Data Centre", - "official_name": "KNMI Data Centre" - }, - "CUL.CIESIN": { - "openaire_id": "re3data_____::r3d100010207", - "datacite_name": "Center for International Earth Science Information Network", - "official_name": "Center for International Earth Science Information Network" - }, - "WISC.NEOTOMA": { - "openaire_id": "re3data_____::r3d100011761", - "datacite_name": "Neotoma Paleoecological Database", - "official_name": "Neotoma Paleoecology Database", - "similarity": 0.9180327868852459 - }, - "IRIS.IRIS": { - "openaire_id": "re3data_____::r3d100010268", - "datacite_name": "Incorporated Research Institutions for Seismology", - "official_name": "Incorporated Research Institutions for Seismology" - }, - "ANDS.CENTRE50": { - "openaire_id": "re3data_____::r3d100012378", - "datacite_name": "Analysis and Policy Observatory", - "official_name": "Analysis and Policy Observatory" - }, - "FAO.RING": { - "openaire_id": "re3data_____::r3d100012571", - "datacite_name": "CIARD RING", - "official_name": "CIARD Ring" - }, - "CUL.R2R": { - "openaire_id": "re3data_____::r3d100010735", - "datacite_name": "Rolling Deck to Repository", - "official_name": "Rolling Deck to Repository" - }, - "DEMO.GRIIDC": { - "openaire_id": "re3data_____::r3d100011571", - "datacite_name": "Gulf of Mexico Research Initiative Information and Data Cooperative", - "official_name": "Gulf of Mexico Research Initiative Information and Data Cooperative" - }, - "ANDS.CENTRE-6": { - "openaire_id": "re3data_____::r3d100012268", - "datacite_name": "Curtin University", - "official_name": "Curtin University Research Data Collection" - }, - "ANDS.CENTRE-5": { - "openaire_id": "re3data_____::r3d100012013", - "datacite_name": "TERN Central Portal", - "official_name": "TERN Data Discovery portal" - }, "FIGSHARE.UCT": { "openaire_id": "re3data_____::r3d100012633", "datacite_name": "University of Cape Town (UCT)", "official_name": "ZivaHub" }, - "BIBSYS.UIT-ORD": { - "openaire_id": "re3data_____::r3d100012538", - "datacite_name": "DataverseNO", - "official_name": "DataverseNO" - }, - "CISTI.CADC": { - "openaire_id": "re3data_____::r3d100000016", - "datacite_name": "Canadian Astronomy Data Centre", - "official_name": "The Canadian Astronomy Data Centre", - "similarity": 0.9375 - }, - "BL.CCDC": { - "openaire_id": "re3data_____::r3d100010197", - "datacite_name": "The Cambridge Crystallographic Data Centre", - "official_name": "The Cambridge Structural Database" - }, - "BL.UCLD": { - "openaire_id": "re3data_____::r3d100012417", - "datacite_name": "University College London", - "official_name": "UCL Discovery" - }, - "GESIS.RKI": { - "openaire_id": "re3data_____::r3d100010436", - "datacite_name": "'Health Monitoring' Research Data Centre at the Robert Koch Institute", - "official_name": "'Health Monitoring' Research Data Centre at the Robert Koch Institute" - }, - "BL.DRI": { - "openaire_id": "re3data_____::r3d100011805", - "datacite_name": "Digital Repository of Ireland", - "official_name": "Digital Repository of Ireland" - }, - "TIB.KIT-IMK": { - "openaire_id": "re3data_____::r3d100011956", - "datacite_name": "Institute for Meteorology and Climate Research - Atmospheric Trace Gases and Remote Sensing", - "official_name": "CARIBIC" - }, - "DOINZ.LANDCARE": { - "openaire_id": "re3data_____::r3d100011662", - "datacite_name": "Landcare Research New Zealand Ltd", - "official_name": "Landcare Research Data Repository" - }, - "DEMO.EMORY": { - "openaire_id": "re3data_____::r3d100011559", - "datacite_name": "The Cancer Imaging Archive", - "official_name": "The Cancer Imaging Archive" - }, - "UMN.DRUM": { - "openaire_id": "re3data_____::r3d100011393", - "datacite_name": "Data Repository for the University of Minnesota", - "official_name": "Data Repository for the University of Minnesota" - }, - "CISTI.SFU": { - "openaire_id": "re3data_____::r3d100012512", - "datacite_name": "Simon Fraser University", - "official_name": "SFU Radar" - }, - "GESIS.ICPSR": { - "openaire_id": "re3data_____::r3d100010255", - "datacite_name": "ICPSR", - "official_name": "Inter-university Consortium for Political and Social Research" - }, - "ANDS.CENTRE49": { - "openaire_id": "re3data_____::r3d100012145", - "datacite_name": "The University of Melbourne", - "official_name": "melbourne.figshare.com" - }, - "ZBW.IFO": { - "openaire_id": "re3data_____::r3d100010201", - "datacite_name": "LMU-ifo Economics & Business Data Center", - "official_name": "LMU-ifo Economics & Business Data Center" - }, - "TIB.BEILST": { - "openaire_id": "re3data_____::r3d100012329", - "datacite_name": "Beilstein-Institut zur F\u00f6rderung der Chemischen Wissenschaften", - "official_name": "STRENDA DB" - }, - "ZBW.ZBW-JDA": { - "openaire_id": "re3data_____::r3d100012190", - "datacite_name": "ZBW Journal Data Archive", - "official_name": "ZBW Journal Data Archive" - }, - "BL.UKDA": { - "openaire_id": "re3data_____::r3d100010215", - "datacite_name": "UK Data Archive", - "official_name": "UK Data Archive" - }, - "CERN.INSPIRE": { - "openaire_id": "re3data_____::r3d100011077", - "datacite_name": "inspirehep.net", - "official_name": "Inspire-HEP" - }, - "CISTI.OTNDC": { - "openaire_id": "re3data_____::r3d100012083", - "datacite_name": "Ocean Tracking Network", - "official_name": "Ocean Tracking Network" - }, - "CISTI.CC": { - "openaire_id": "re3data_____::r3d100012646", - "datacite_name": "Compute Canada", - "official_name": "Federated Research Data Repository" - }, - "SND.ICOS": { - "openaire_id": "re3data_____::r3d100012203", - "datacite_name": "ICOS Carbon Portal", - "official_name": "ICOS Carbon Portal" - }, - "BL.MENDELEY": { - "openaire_id": "re3data_____::r3d100011868", - "datacite_name": "Mendeley", - "official_name": "Mendeley Data" - }, - "DELFT.UU": { - "openaire_id": "re3data_____::r3d100011201", - "datacite_name": "Universiteit Utrecht", - "official_name": "DataverseNL" - }, - "GESIS.DSZ-BO": { - "openaire_id": "re3data_____::r3d100010439", - "datacite_name": "Data Service Center for Business and Organizational Data", - "official_name": "Data Service Center for Business and Organizational Data" - }, - "TIB.IPK": { - "openaire_id": "re3data_____::r3d100011647", - "datacite_name": "IPK Gatersleben", - "official_name": "IPK Gatersleben" - }, - "GDCC.HARVARD-DV": { - "openaire_id": "re3data_____::r3d100010051", - "datacite_name": "Harvard IQSS Dataverse", - "official_name": "Harvard Dataverse" - }, - "BL.LEEDS": { - "openaire_id": "re3data_____::r3d100011945", - "datacite_name": "University of Leeds", - "official_name": "Research Data Leeds Repository" - }, - "BL.BRUNEL": { - "openaire_id": "re3data_____::r3d100012140", - "datacite_name": "Brunel University London", - "official_name": "Brunel figshare" - }, - "DEMO.ENVIDAT": { - "openaire_id": "re3data_____::r3d100012587", - "datacite_name": "EnviDat", - "official_name": "EnviDat" - }, - "GDCC.NTU": { - "openaire_id": "re3data_____::r3d100012440", - "datacite_name": "Nanyang Technological University", - "official_name": "DR-NTU (Data)" - }, - "UNM.DATAONE": { - "openaire_id": "re3data_____::r3d100000045", - "datacite_name": "DataONE", - "official_name": "DataONE" - }, - "CSC.NRD": { - "openaire_id": "re3data_____::r3d100012157", - "datacite_name": "Ministry of Culture and Education", - "official_name": "IDA Research Data Storage Service" - }, - "GESIS.DIPF": { - "openaire_id": "re3data_____::r3d100010390", - "datacite_name": "Research Data Centre for Education", - "official_name": "Research Data Centre for Education" - }, - "BL.HALLAM": { - "openaire_id": "re3data_____::r3d100011909", - "datacite_name": "Sheffield Hallam University", - "official_name": "Sheffield Hallam University Research Data Archive" - }, - "BL.LSHTM": { - "openaire_id": "re3data_____::r3d100011800", - "datacite_name": "London School of Hygiene and Tropical Medicine", - "official_name": "LSHTM Data Compass" - }, - "SUBGOE.DARIAH": { - "openaire_id": "re3data_____::r3d100011345", - "datacite_name": "Digital Research Infrastructure for the Arts and Humanities", - "official_name": "DARIAH-DE Repository" - }, - "SND.SU": { - "openaire_id": "re3data_____::r3d100012147", - "datacite_name": "Stockholm University", - "official_name": "Stockholm University repository for data" - }, - "GESIS.INDEPTH": { - "openaire_id": "re3data_____::r3d100011392", - "datacite_name": "INDEPTH Network", - "official_name": "INDEPTH Data Repository" - }, - "TIB.FLOSS": { - "openaire_id": "re3data_____::r3d100010863", - "datacite_name": "FLOSS Project, Syracuse University", - "official_name": "FLOSSmole" - }, - "ETHZ.WGMS": { - "openaire_id": "re3data_____::r3d100010627", - "datacite_name": "World Glacier Monitoring Service", - "official_name": "World Glacier Monitoring Service" - }, - "BL.UEL": { - "openaire_id": "re3data_____::r3d100012414", - "datacite_name": "University of East London", - "official_name": "Data.uel" - }, - "DELFT.DATA4TU": { - "openaire_id": "re3data_____::r3d100010216", - "datacite_name": "4TU.Centre for Research Data", - "official_name": "4TU.Centre for Research Data" - }, - "GESIS.IANUS": { - "openaire_id": "re3data_____::r3d100012361", - "datacite_name": "IANUS - FDZ Arch\u00e4ologie & Altertumswissenschaften", - "official_name": "IANUS Datenportal" - }, - "CDL.UCSDCCA": { - "openaire_id": "re3data_____::r3d100011655", - "datacite_name": "California Coastal Atlas", - "official_name": "California Coastal Atlas" - }, - "VIVA.VT": { - "openaire_id": "re3data_____::r3d100012601", - "datacite_name": "Virginia Tech", - "official_name": "VTechData" - }, - "ANDS.CENTRE39": { - "openaire_id": "re3data_____::r3d100011640", - "datacite_name": "University of the Sunshine Coast", - "official_name": "USC Research Bank research data" - }, - "DEMO.OPENKIM": { - "openaire_id": "re3data_____::r3d100011864", - "datacite_name": "OpenKIM", - "official_name": "OpenKIM" - }, - "INIST.OTELO": { - "openaire_id": "re3data_____::r3d100012505", - "datacite_name": "Observatoire Terre Environnement de Lorraine", - "official_name": "ORDaR" - }, - "INIST.ILL": { - "openaire_id": "re3data_____::r3d100012072", - "datacite_name": "Institut Laue-Langevin", - "official_name": "ILL Data Portal" - }, - "ANDS.CENTRE31": { - "openaire_id": "re3data_____::r3d100012378", - "datacite_name": "Test: Analysis and Policy Observatory", - "official_name": "Analysis and Policy Observatory", - "similarity": 0.9117647058823529 - }, - "ANDS.CENTRE30": { - "openaire_id": "re3data_____::r3d100010917", - "datacite_name": "Test: Geoscience Australia", - "official_name": "Geoscience Australia", - "similarity": 0.8695652173913043 - }, - "BL.SALFORD": { - "openaire_id": "re3data_____::r3d100012144", - "datacite_name": "University of Salford", - "official_name": "University of Salford Data Repository" - }, - "CERN.HEPDATA": { - "openaire_id": "re3data_____::r3d100010081", - "datacite_name": "HEPData.net", - "official_name": "HEPData" - }, - "ETHZ.E-COLL": { - "openaire_id": "re3data_____::r3d100012557", - "datacite_name": "ETH Z\u00fcrich Research Collection", - "official_name": "ETH Z\u00fcrich Research Collection" - }, - "GBIF.GBIF": { - "openaire_id": "re3data_____::r3d100000039", - "datacite_name": "Global Biodiversity Information Facility", - "official_name": "Global Biodiversity Information Facility" - }, - "ORNLDAAC.DAAC": { - "openaire_id": "re3data_____::r3d100000037", - "datacite_name": "Oak Ridge National Laboratory Distributed Active Archive Center", - "official_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics" - }, - "KAUST.KAUSTREPO": { - "openaire_id": "re3data_____::r3d100011898", - "datacite_name": "KAUST Research Repository", - "official_name": "UWA Research Repository", - "similarity": 0.875 - }, - "ZBW.ZEW": { - "openaire_id": "re3data_____::r3d100010399", - "datacite_name": "Zentrum f\u00fcr Europ\u00e4ische Wirtschaftsforschung GmbH (ZEW)", - "official_name": "ZEW Forschungsdatenzentrum" - }, - "SML.TDAR": { - "openaire_id": "re3data_____::r3d100010347", - "datacite_name": "Digital Antiquity (TDAR)", - "official_name": "tDAR" - }, - "GESIS.CSDA": { - "openaire_id": "re3data_____::r3d100010484", - "datacite_name": "Czech Social Science Data Archive", - "official_name": "Czech Social Science Data Archive" - }, - "SND.BOLIN": { - "openaire_id": "re3data_____::r3d100011699", - "datacite_name": "Bolin Centre Database", - "official_name": "Bolin Centre Database" - }, - "MLA.HC": { - "openaire_id": "re3data_____::r3d100012309", - "datacite_name": "Humanities Commons", - "official_name": "Humanities Commons" - }, - "CDL.IDASHREP": { - "openaire_id": "re3data_____::r3d100010382", - "datacite_name": "iDASH Repository", - "official_name": "IDS Repository", - "similarity": 0.8666666666666667 - }, - "ZBMED.SNSB": { - "openaire_id": "re3data_____::r3d100011873", - "datacite_name": "Staatliche Naturwissenschaftliche Sammlungen Bayerns", - "official_name": "Staatliche Naturwissenschaftliche Sammlungen Bayerns - datasets", - "similarity": 0.9043478260869565 - }, - "ORBIS.OHSU": { - "openaire_id": "re3data_____::r3d100012244", - "datacite_name": "Oregon Health Sciences University", - "official_name": "OHSU Digital Commons" - }, - "DARTLIB.CRAWDAD": { - "openaire_id": "re3data_____::r3d100010716", - "datacite_name": "CRAWDAD", - "official_name": "CRAWDAD" - }, - "CDL.CCHDO": { - "openaire_id": "re3data_____::r3d100010831", - "datacite_name": "CLIVAR and Carbon Hydrographic Data Office", - "official_name": "Climate Variability and Predictability and Carbon Hydrographic Data Office" - }, - "GESIS.AUSSDA": { - "openaire_id": "re3data_____::r3d100010483", - "datacite_name": "Austrian Social Science Data Archive", - "official_name": "AUSSDA" - }, - "NSIDC.DATACTR": { - "openaire_id": "re3data_____::r3d100010110", - "datacite_name": "National Snow and Ice Data Center", - "official_name": "National Snow and Ice Data Center" - }, - "TIB.RADAR": { - "openaire_id": "re3data_____::r3d100012330", - "datacite_name": "FIZ Karlsruhe \u2013 Leibniz-Institut f\u00fcr Informationsinfrastruktur", - "official_name": "RADAR" - }, - "KIM.OPENKIM": { - "openaire_id": "re3data_____::r3d100011864", - "datacite_name": "Open Knowledgebase of Interatomic Models (OpenKIM)", - "official_name": "OpenKIM" - }, - "BL.LBORO": { - "openaire_id": "re3data_____::r3d100012143", - "datacite_name": "Loughborough University", - "official_name": "Loughborough Data Repository" - }, - "GESIS.ZPID": { - "openaire_id": "re3data_____::r3d100010328", - "datacite_name": "GESIS.ZPID", - "official_name": "PsychData" - }, - "SML.TCIA": { - "openaire_id": "re3data_____::r3d100011559", - "datacite_name": "The Cancer Imaging Archive", - "official_name": "The Cancer Imaging Archive" - }, - "CDL.IRIS": { - "openaire_id": "re3data_____::r3d100010268", - "datacite_name": "Incorporated Research Institutions for Seismology", - "official_name": "Incorporated Research Institutions for Seismology" - }, - "BIBSYS.NMDC": { - "openaire_id": "re3data_____::r3d100012291", - "datacite_name": "Norwegian Marine Data Centre", - "official_name": "Norwegian Polar Data Centre", - "similarity": 0.8727272727272727 - }, - "ANDS.CENTRE25": { - "openaire_id": "re3data_____::r3d100010917", - "datacite_name": "Geoscience Australia", - "official_name": "Geoscience Australia" - }, - "BL.UCLAN": { - "openaire_id": "re3data_____::r3d100012019", - "datacite_name": "University of Central Lancashire", - "official_name": "UCLanData" - }, - "ANDS.CENTRE23": { - "openaire_id": "re3data_____::r3d100011898", - "datacite_name": "The University of Western Australia", - "official_name": "UWA Research Repository" - }, - "CISTI.WOUDC": { - "openaire_id": "re3data_____::r3d100010367", - "datacite_name": "World Ozone and Ultraviolet Radiation Data Centre", - "official_name": "World Ozone and Ultraviolet Radiation Data Centre" - }, - "FIGSHARE.ARS": { - "openaire_id": "re3data_____::r3d100010066", - "datacite_name": "figshare Academic Research System", - "official_name": "figshare" - }, - "ILLINOIS.DATABANK": { - "openaire_id": "re3data_____::r3d100012001", - "datacite_name": "Illinois Data Bank", - "official_name": "Illinois Data Bank" - }, - "BL.ECMWF": { - "openaire_id": "re3data_____::r3d100011726", - "datacite_name": "European Centre for Medium-Range Weather Forecasts", - "official_name": "European Centre for Medium-Range Weather Forecasts" - }, - "CDL.ISSDA": { - "openaire_id": "re3data_____::r3d100010497", - "datacite_name": "Irish Social Science Data Archive (ISSDA)", - "official_name": "Irish Social Science Data Archive" - }, - "CDL.PQR": { - "openaire_id": "re3data_____::r3d100012225", - "datacite_name": "Pitt Quantum Repository", - "official_name": "Pitt Quantum Repository" - }, - "ANDS.CENTRE82": { - "openaire_id": "re3data_____::r3d100010138", - "datacite_name": "Test: Australian Data Archive", - "official_name": "Australian Data Archive", - "similarity": 0.8846153846153846 - }, - "GDCC.HARVARD-SLP": { - "openaire_id": "re3data_____::r3d100011861", - "datacite_name": "National Sleep Research Resource", - "official_name": "National Sleep Research Resource" - }, - "CDL.IMMPORT": { - "openaire_id": "re3data_____::r3d100012529", - "datacite_name": "UCSF ImmPort", - "official_name": "ImmPort" - }, - "GESIS.FID": { - "openaire_id": "re3data_____::r3d100012347", - "datacite_name": "FID f\u00fcr internationale und interdisziplin\u00e4re Rechtsforschung", - "official_name": "\u00b2Dok[\u00a7]" - }, - "OCEAN.OCEAN": { - "openaire_id": "re3data_____::r3d100012369", - "datacite_name": "Code Ocean", - "official_name": "Code Ocean" - }, - "CERN.ZENODO": { - "openaire_id": "re3data_____::r3d100010468", - "datacite_name": "Zenodo", - "official_name": "Zenodo" - }, - "ETHZ.DA-RD": { - "openaire_id": "re3data_____::r3d100011626", - "datacite_name": "ETHZ Data Archive - Research Data", - "official_name": "ETH Data Archive" - }, - "SND.ECDS": { - "openaire_id": "re3data_____::r3d100011000", - "datacite_name": "Environment Climate Data Sweden", - "official_name": "Environment Climate Data Sweden" - }, - "BL.BATH": { - "openaire_id": "re3data_____::r3d100011947", - "datacite_name": "University of Bath", - "official_name": "University of Bath Research Data Archive" - }, - "TIB.LDEO": { - "openaire_id": "re3data_____::r3d100012547", - "datacite_name": "LDEO - Lamont-Doherty Earth Observatory, Columbia University", - "official_name": "Lamont-Doherty Core Repository" - }, - "COS.OSF": { - "openaire_id": "re3data_____::r3d100011137", - "datacite_name": "Open Science Framework", - "official_name": "Open Science Framework" - }, - "ESTDOI.REPO": { - "openaire_id": "re3data_____::r3d100012333", - "datacite_name": "DataDOI", - "official_name": "DataDOI" - }, - "CDL.NSFADC": { - "openaire_id": "re3data_____::r3d100011973", - "datacite_name": "NSF Arctic Data Center", - "official_name": "NSF Arctic Data Center" - }, - "ANDS.CENTRE13": { - "openaire_id": "re3data_____::r3d100010477", - "datacite_name": "The Australian National University", - "official_name": "ANU Data Commons" - }, - "BL.NERC": { - "openaire_id": "re3data_____::r3d100010199", - "datacite_name": "Natural Environment Research Council", - "official_name": "Environmental Information Data Centre" - }, - "SAGEBIO.SYNAPSE": { - "openaire_id": "re3data_____::r3d100011894", - "datacite_name": "Synapse", - "official_name": "Synapse" - }, - "ANDS.CENTRE15": { - "openaire_id": "re3data_____::r3d100000038", - "datacite_name": "Australian Antarctic Division", - "official_name": "Australian Antarctic Data Centre" - }, - "WISC.BMRB": { - "openaire_id": "re3data_____::r3d100010191", - "datacite_name": "Biological Magnetic Resonance Bank", - "official_name": "Biological Magnetic Resonance Data Bank", - "similarity": 0.9315068493150684 - }, - "STSCI.MAST": { - "openaire_id": "re3data_____::r3d100010403", - "datacite_name": "Barbara A. Mikulski Archive for Space Telescopes", - "official_name": "Barbara A. Mikulski Archive for Space Telescopes" - }, - "CDL.NSIDC": { - "openaire_id": "re3data_____::r3d100010110", - "datacite_name": "National Snow and Ice Data Center", - "official_name": "National Snow and Ice Data Center" - }, - "BL.STRATH": { - "openaire_id": "re3data_____::r3d100012412", - "datacite_name": "University of Strathclyde", - "official_name": "University of Strathclyde KnowledgeBase Datasets" - }, - "DEMO.TDAR": { - "openaire_id": "re3data_____::r3d100010347", - "datacite_name": "The Digital Archaeological Record (tDAR)", - "official_name": "tDAR" - }, - "TIND.CALTECH": { - "openaire_id": "re3data_____::r3d100012384", - "datacite_name": "CaltechDATA", - "official_name": "CaltechDATA" - }, - "GESIS.BIBB-FDZ": { - "openaire_id": "re3data_____::r3d100010190", - "datacite_name": "Forschungsdatenzentrum im Bundesinstitut f\u00fcr Berufsbildung", - "official_name": "Forschungsdatenzentrum im Bundesinstitut f\u00fcr Berufsbildung" - }, - "ANDS.CENTRE87": { - "openaire_id": "re3data_____::r3d100010138", - "datacite_name": "Australian Data Archive", - "official_name": "Australian Data Archive" - }, - "GESIS.NEPS": { - "openaire_id": "re3data_____::r3d100010736", - "datacite_name": "Nationales Bildungspanel (National Educational Panel Study, NEPS)", - "official_name": "Nationales Bildungspanel" - }, - "CDL.UCBCRCNS": { - "openaire_id": "re3data_____::r3d100011269", - "datacite_name": "Collaborative Research in Computational Neuroscience (CRCNS)", - "official_name": "Collaborative Research in Computational Neuroscience" - }, - "TIB.UKON": { - "openaire_id": "re3data_____::r3d100010469", - "datacite_name": "Movebank", - "official_name": "Movebank" - }, - "UMN.IPUMS": { - "openaire_id": "re3data_____::r3d100010794", - "datacite_name": "Minnesota Population Center", - "official_name": "Minnesota Population Center" - }, - "TIB.BIKF": { - "openaire_id": "re3data_____::r3d100012379", - "datacite_name": "Senckenberg Data & Metadata Repository", - "official_name": "Senckenberg Data & Metadata Repository" - }, - "TDL.GRIIDC": { - "openaire_id": "re3data_____::r3d100011571", - "datacite_name": "Gulf of Mexico Research Initiative Information and Data Cooperative", - "official_name": "Gulf of Mexico Research Initiative Information and Data Cooperative" - }, - "DELFT.NIBG": { - "openaire_id": "re3data_____::r3d100012167", - "datacite_name": "Sound and Vision", - "official_name": "Sound and Vision" - }, - "BL.SURREY": { - "openaire_id": "re3data_____::r3d100012232", - "datacite_name": "University of Surrey", - "official_name": "Surrey Research Insight" - }, - "OSTI.ORNLNGEE": { - "openaire_id": "re3data_____::r3d100011676", - "datacite_name": "NGEE-Arctic (Next Generation Ecosystems Experiement)", - "official_name": "NGEE Arctic" - }, - "TIB.WDCRSAT": { - "openaire_id": "re3data_____::r3d100010156", - "datacite_name": "World Data Center for Remote Sensing of the Atmosphere", - "official_name": "The World Data Center for Remote Sensing of the Atmosphere", - "similarity": 0.9642857142857143 - }, - "ZBMED.DSMZ": { - "openaire_id": "re3data_____::r3d100010219", - "datacite_name": "DSMZ", - "official_name": "DSMZ" - }, - "DOINZ.NZAU": { - "openaire_id": "re3data_____::r3d100012110", - "datacite_name": "University of Auckland Data Publishing and Discovery Service", - "official_name": "University of Auckland Data Repository" - }, - "INIST.RESIF": { - "openaire_id": "re3data_____::r3d100012222", - "datacite_name": "R\u00e9seau sismologique et g\u00e9od\u00e9sique fran\u00e7ais", - "official_name": "RESIF Seismic Data Portal" - }, - "CDL.NCEAS": { - "openaire_id": "re3data_____::r3d100010093", - "datacite_name": "National Center for Ecological Analysis and Synthesis (NCEAS)", - "official_name": "National Center for Ecological Analysis and Synthesis Data Repository" - }, - "ZBMED.EMP": { - "openaire_id": "re3data_____::r3d100010234", - "datacite_name": "eyeMoviePedia", - "official_name": "eyeMoviePedia" - }, - "ZBMED.BIOFRESH": { - "openaire_id": "re3data_____::r3d100011651", - "datacite_name": "Project BioFresh, Leibniz-Institute of Freshwater Ecology and Inland Fisheries", - "official_name": "Freshwater Biodiversity Data Portal" - }, - "INIST.IFREMER": { - "openaire_id": "re3data_____::r3d100011867", - "datacite_name": "Institut Fran\u00e7ais de Recherche pour l'Exploitation de la Mer", - "official_name": "SEANOE" - }, - "ETHZ.SICAS": { - "openaire_id": "re3data_____::r3d100011560", - "datacite_name": "SICAS", - "official_name": "Sicas Medical Image Repository" - }, - "SND.SND": { - "openaire_id": "re3data_____::r3d100010146", - "datacite_name": "Swedish National Data Service", - "official_name": "Swedish National Data Service" - }, - "DELFT.EASY": { - "openaire_id": "re3data_____::r3d100011201", - "datacite_name": "DANS", - "official_name": "DataverseNL" - }, - "WH.WHOAS": { - "openaire_id": "re3data_____::r3d100010423", - "datacite_name": "Woods Hole Open Access Server", - "official_name": "Woods Hole Open Access Server" - }, - "DATACITE.UCSC": { - "openaire_id": "re3data_____::r3d100010243", - "datacite_name": "UCSC Genome Browser", - "official_name": "UCSC Genome Browser" + "CSIC.DIGITAL": { + "openaire_id": "re3data_____::r3d100011076", + "datacite_name": "Digital CSIC", + "official_name": "DIGITAL.CSIC" } } \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index a0b7cd95e..48d0f1497 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -19,11 +19,33 @@ import java.time.chrono.ThaiBuddhistDate import java.time.format.DateTimeFormatter import java.util.{Date, Locale} import scala.collection.JavaConverters._ +import scala.io.{Codec, Source} object DataciteToOAFTransformation { + case class HostedByMapType( + openaire_id: String, + datacite_name: String, + official_name: String, + similarity: Option[Float] + ) {} + val mapper = new ObjectMapper() + val unknown_repository: HostedByMapType = HostedByMapType( + ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, + ModelConstants.UNKNOWN_REPOSITORY.getValue, + ModelConstants.UNKNOWN_REPOSITORY.getValue, + Some(1.0f) + ) + + val hostedByMap: Map[String, HostedByMapType] = { + val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(s) + json.extract[Map[String, HostedByMapType]] + } + /** This method should skip record if json contains invalid text * defined in gile datacite_filter * @@ -534,12 +556,9 @@ object DataciteToOAFTransformation { if (client.isDefined) { - instance.setHostedby( - OafMapperUtils.keyValue( - generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID), - ModelConstants.UNKNOWN_REPOSITORY.getValue - ) - ) + val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository) + instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name)) + instance.setCollectedfrom(DATACITE_COLLECTED_FROM) instance.setUrl(List(s"https://dx.doi.org/$doi").asJava) instance.setAccessright(access_rights_qualifier) From f0ea2410e5ef3250768ad6d78a24ea8c6aefabd3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 10:50:34 +0100 Subject: [PATCH 149/176] improved mapping titles from datacite records to consider title types --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 27 ++++++++++++++++--- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 20 ++++++++++++++ .../dnetlib/dhp/oa/graph/raw/odf_dataset.xml | 3 ++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 194715295..639c1ab30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; @@ -11,8 +12,11 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; +import org.dom4j.Element; import org.dom4j.Node; +import com.google.common.collect.Lists; + import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.*; @@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareTitles(final Document doc, final DataInfo info) { - return prepareListStructProps( - doc, - "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']", - MAIN_TITLE_QUALIFIER, info); + + final List title = Lists.newArrayList(); + final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']"; + + for (Object o : doc.selectNodes(xpath)) { + Element e = (Element) o; + final String titleValue = e.getTextTrim(); + final String titleType = e.attributeValue("titleType"); + if (StringUtils.isNotBlank(titleType)) { + title + .add( + structuredProperty( + titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info)); + } else { + title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info)); + } + } + + return title; } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index de79b750a..6a3414b7c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -355,6 +356,25 @@ class MappersTest { assertTrue(r2.getValidated()); assertEquals("2020-01-01", r1.getValidationDate()); assertEquals("2020-01-01", r2.getValidationDate()); + + assertNotNull(d.getTitle()); + assertEquals(2, d.getTitle().size()); + verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017"); + verifyTitle(d, "Subtitle", "survey"); + } + + private void verifyTitle(Dataset d, String titleType, String title) { + Optional + .of( + d + .getTitle() + .stream() + .filter(t -> titleType.equals(t.getQualifier().getClassid())) + .collect(Collectors.toList())) + .ifPresent(t -> { + assertEquals(1, t.size()); + assertEquals(title, t.get(0).getValue()); + }); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml index 31de2e45b..4f41ee6ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml @@ -54,7 +54,8 @@ - Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 + Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 + survey Zenodo 2019 From dd52bf1bb8c1b3732405cb5d100d53d5f062d934 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 13:59:29 +0100 Subject: [PATCH 150/176] copy relations to the graphOutputPath --- .../dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml index 883d0e1fb..f77b46105 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -283,7 +283,16 @@ - + + + + + ${nameNode}/${graphBasePath}/relation + ${nameNode}/${graphOutputPath}/relation + + + + \ No newline at end of file From 2f385b3ac6b7ebdb781ac568cbb0ffa366d9e351 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 13:59:46 +0100 Subject: [PATCH 151/176] updated dnet workflow profile definitions --- .../00_beta_graph_complete_experiment.xml | 111 ++++++++++++----- .../dhp/provision/00_beta_graph_for_IIS.xml | 87 ++++++++++++- .../dhp/provision/00_prod_graph_for_IIS.xml | 53 +++++++- .../dnetlib/dhp/provision/02_beta_graph.xml | 115 ++++++++++++++++-- .../dnetlib/dhp/provision/02_prod_graph.xml | 99 +++++++++++++-- 5 files changed, 398 insertions(+), 67 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml index c10dd4e99..82cf9d3d5 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml @@ -4,7 +4,7 @@ - + Graph processing [EXPERIMENT] @@ -15,7 +15,7 @@ set the path of unresolved entities unresolvedEntityPath - /data/unresolved_BETA + /data/unresolved_BETA/content @@ -51,6 +51,16 @@ + + set the number of iteration in affiliation propagation + + iterations + 1 + + + + + Set the target path to store the MERGED graph @@ -91,11 +101,21 @@ + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_experiment/graph/05_graph_grouped + + + + + Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_experiment/graph/05_graph_inferred + /tmp/beta_experiment/graph/06_graph_inferred @@ -105,7 +125,7 @@ Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_experiment/graph/06_graph_dedup + /tmp/beta_experiment/graph/07_graph_dedup @@ -115,7 +135,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_experiment/graph/07_graph_consistent + /tmp/beta_experiment/graph/08_graph_consistent @@ -125,7 +145,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_experiment/graph/08_graph_orcid + /tmp/beta_experiment/graph/09_graph_orcid @@ -135,7 +155,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_experiment/graph/09_graph_bulktagging + /tmp/beta_experiment/graph/10_graph_bulktagging @@ -145,7 +165,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_experiment/graph/10_graph_affiliation + /tmp/beta_experiment/graph/11_graph_affiliation @@ -155,7 +175,7 @@ Set the target path to store the AFFILIATION from SEMATIC RELATION graph affiliationSemRelGraphPath - /tmp/beta_experiment/graph/11_graph_affiliationsr + /tmp/beta_experiment/graph/12_graph_affiliationsr @@ -165,7 +185,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_experiment/graph/12_graph_community_organization + /tmp/beta_experiment/graph/13_graph_community_organization @@ -175,7 +195,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_experiment/graph/13_graph_funding + /tmp/beta_experiment/graph/14_graph_funding @@ -185,7 +205,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_experiment/graph/14_graph_community_sem_rel + /tmp/beta_experiment/graph/15_graph_community_sem_rel @@ -195,7 +215,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_experiment/graph/15_graph_country + /tmp/beta_experiment/graph/16_graph_country @@ -205,7 +225,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_experiment/graph/16_graph_cleaned + /tmp/beta_experiment/graph/17_graph_cleaned @@ -215,7 +235,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_experiment/graph/17_graph_blacklisted + /tmp/beta_experiment/graph/18_graph_blacklisted @@ -548,14 +568,14 @@ 'mongoURL' : 'mongodb://beta.services.openaire.eu', 'mongoDb' : 'mdstore', 'mdstoreManagerUrl' : 'https://beta.services.openaire.eu/mdstoremanager', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '', - 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.40:5432/oa_organizations', 'postgresOpenOrgsUser' : '', 'postgresOpenOrgsPassword' : '', 'shouldHashId' : 'true', - 'importOpenorgs' : 'true', + 'importOpenorgs' : 'false', 'workingDir' : '/tmp/beta_experiment/working_dir/beta_aggregator' } @@ -594,10 +614,10 @@ 'mongoURL' : 'mongodb://services.openaire.eu', 'mongoDb' : 'mdstore', 'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '', - 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.39:5432/oa_organizations', 'postgresOpenOrgsUser' : '', 'postgresOpenOrgsPassword' : '', 'shouldHashId' : 'true', @@ -737,11 +757,11 @@ executeOozieJob IIS - { + { 'graphBasePath':'cleanedFirstGraphPath', 'unresolvedPath' :'unresolvedEntityPath', 'targetPath':'resolvedGraphPath' - } + } { @@ -752,6 +772,30 @@ } + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -867,9 +911,9 @@ import_mdstore_service_location import_dataset_mdstore_ids_csv oozie.wf.application.path - /lib/iis/primary/snapshots/2021-09-24 + /lib/iis/primary/snapshots/2021-12-09 IIS - /tmp/beta_inference/graph/07_graph_cleaned + deprecated - not used import_infospace_graph_location import_project_concepts_context_ids_csv @@ -908,7 +952,7 @@ 'import_islookup_service_location' : 'import_islookup_service_location', 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', - 'import_infospace_graph_location' : 'import_infospace_graph_location', + 'import_infospace_graph_location' : 'groupedGraphPath', 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', @@ -958,7 +1002,7 @@ { 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'resolvedGraphPath', + 'inputGraphRootPath' : 'groupedGraphPath', 'outputGraphRootPath' : 'inferredGraphPath', 'isLookupUrl' : 'isLookUpUrl' } @@ -1125,7 +1169,8 @@ { 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'affiliationSemRelGraphPath' + 'outputPath': 'affiliationSemRelGraphPath', + 'iterations':'iterations' } @@ -1283,7 +1328,7 @@ { 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', 'workingDir' : '/tmp/beta_experiment/working_dir/blacklist', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '' } @@ -1296,10 +1341,10 @@ - wf_20211206_093743_83 - 2021-12-06T10:12:32+00:00 - SUCCESS - + wf_20220111_200505_785 + 2022-01-11T20:08:53+00:00 + + - + \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml index 2fed35f44..df9528f4c 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_inference/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_inference/graph/04_graph_dedup + /tmp/beta_inference/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_inference/graph/05_graph_consistent + /tmp/beta_inference/graph/07_graph_consistent @@ -95,7 +125,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_inference/graph/06_graph_cleaned + /tmp/beta_inference/graph/08_graph_cleaned @@ -548,6 +578,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -560,7 +639,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml index e5ce3d710..0ea6be341 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -61,11 +71,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_inference/graph/03_graph_dedup + /tmp/prod_inference/graph/04_graph_dedup @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_inference/graph/04_graph_consistent + /tmp/prod_inference/graph/05_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_inference/graph/05_graph_cleaned + /tmp/prod_inference/graph/06_graph_cleaned @@ -347,6 +367,31 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + @@ -359,7 +404,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index f83337b3c..73c44aba8 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_provision/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_provision/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_provision/graph/04_graph_dedup + /tmp/beta_provision/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_provision/graph/05_graph_inferred + /tmp/beta_provision/graph/07_graph_inferred @@ -95,7 +125,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_provision/graph/06_graph_consistent + /tmp/beta_provision/graph/08_graph_consistent @@ -105,7 +135,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_provision/graph/07_graph_orcid + /tmp/beta_provision/graph/09_graph_orcid @@ -115,7 +145,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_provision/graph/08_graph_bulktagging + /tmp/beta_provision/graph/10_graph_bulktagging @@ -125,7 +155,17 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_provision/graph/09_graph_affiliation + /tmp/beta_provision/graph/11_graph_affiliation + + + + + + + Set the target path to store the AFFILIATION from SEMATIC RELATION graph + + affiliationSemRelGraphPath + /tmp/beta_provision/graph/12_graph_affiliationsr @@ -135,7 +175,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_provision/graph/10_graph_comunity_organization + /tmp/beta_provision/graph/13_graph_comunity_organization @@ -145,7 +185,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_provision/graph/11_graph_funding + /tmp/beta_provision/graph/14_graph_funding @@ -155,7 +195,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_provision/graph/12_graph_comunity_sem_rel + /tmp/beta_provision/graph/15_graph_comunity_sem_rel @@ -165,7 +205,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_provision/graph/13_graph_country + /tmp/beta_provision/graph/16_graph_country @@ -175,7 +215,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_provision/graph/14_graph_cleaned + /tmp/beta_provision/graph/17_graph_cleaned @@ -185,7 +225,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_provision/graph/15_graph_blacklisted + /tmp/beta_provision/graph/18_graph_blacklisted @@ -695,6 +735,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -707,7 +796,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml index be6155f2f..205db29a6 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -51,11 +61,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_provision/graph/03_graph_dedup + /tmp/prod_provision/graph/04_graph_dedup @@ -65,7 +85,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/prod_provision/graph/04_graph_inferred + /tmp/prod_provision/graph/05_graph_inferred @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_provision/graph/05_graph_consistent + /tmp/prod_provision/graph/06_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/prod_provision/graph/06_graph_orcid + /tmp/prod_provision/graph/07_graph_orcid @@ -95,7 +115,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/prod_provision/graph/07_graph_bulktagging + /tmp/prod_provision/graph/08_graph_bulktagging @@ -105,7 +125,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/prod_provision/graph/08_graph_affiliation + /tmp/prod_provision/graph/09_graph_affiliation @@ -115,7 +135,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/prod_provision/graph/09_graph_comunity_organization + /tmp/prod_provision/graph/10_graph_comunity_organization @@ -125,7 +145,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/prod_provision/graph/10_graph_funding + /tmp/prod_provision/graph/11_graph_funding @@ -135,7 +155,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/prod_provision/graph/11_graph_comunity_sem_rel + /tmp/prod_provision/graph/12_graph_comunity_sem_rel @@ -145,7 +165,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/prod_provision/graph/12_graph_country + /tmp/prod_provision/graph/13_graph_country @@ -155,7 +175,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_provision/graph/13_graph_cleaned + /tmp/prod_provision/graph/14_graph_cleaned @@ -165,7 +185,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/prod_provision/graph/14_graph_blacklisted + /tmp/prod_provision/graph/15_graph_blacklisted @@ -446,6 +466,59 @@ build-report + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'prepareInfo', + 'hostedByMapPath' : '/user/dnet.production/data/hostedByMap', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + Graph resolution + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution', + 'shouldResolveEntities' : 'false', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '9G' + } + + @@ -458,7 +531,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } From c42623f0066272088eef70a831ddceb9dbf365a0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 14:30:09 +0100 Subject: [PATCH 152/176] added NPE checks --- .../dhp/schema/oaf/utils/OafMapperUtils.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 2f1fc3a58..1ffc66dfd 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -48,8 +48,8 @@ public class OafMapperUtils { public static Result mergeResults(Result left, Result right) { - final boolean leftFromDeletedAuthority = isFromDeletedAuthority(left); - final boolean rightFromDeletedAuthority = isFromDeletedAuthority(right); + final boolean leftFromDeletedAuthority = isFromDelegatedAuthority(left); + final boolean rightFromDeletedAuthority = isFromDelegatedAuthority(right); if (leftFromDeletedAuthority && !rightFromDeletedAuthority) { return left; @@ -67,12 +67,16 @@ public class OafMapperUtils { } } - private static boolean isFromDeletedAuthority(Result r) { - return r - .getInstance() - .stream() - .map(i -> i.getCollectedfrom().getKey()) - .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); + private static boolean isFromDelegatedAuthority(Result r) { + return Optional + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .filter(i -> Objects.nonNull(i.getCollectedfrom())) + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) + .orElse(false); } public static KeyValue keyValue(final String k, final String v) { From 97ad94d7d9187396507b26b2b124b143197e0d11 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 24 Jan 2022 18:02:07 +0100 Subject: [PATCH 153/176] [graph resolution] drop output path at the beginning --- .../dhp/oa/graph/resolution/oozie_app/workflow.xml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 3cd08bc9b..74e792f07 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -19,12 +19,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + yarn From 4fc44edb71e8426040821fd919f0759a46d7cac3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:03:57 +0100 Subject: [PATCH 154/176] depending on dhp-schemas:2.10.26 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 86c3b4526..a40942696 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.26-SNAPSHOT] + [2.10.26] [4.0.3] [6.0.5] [3.1.6] From af61e44accbd8b04b487892dc2ff03e2dfe269ae Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:19:14 +0100 Subject: [PATCH 155/176] ported changes to the GraphCleaningFunctionsTest from 8de97883083bf2608a084840ab1f3e03f4244e05 --- .../dhp/oa/graph/clean/GraphCleaningFunctionsTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index a7c7eb810..7c39efb40 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -75,7 +75,7 @@ public class GraphCleaningFunctionsTest { } @Test - void testFilter_false() throws Exception { + void testFilter_invisible_true() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); @@ -87,11 +87,11 @@ public class GraphCleaningFunctionsTest { assertTrue(p_in instanceof Result); assertTrue(p_in instanceof Publication); - assertEquals(false, GraphCleaningFunctions.filter(p_in)); + assertEquals(true, GraphCleaningFunctions.filter(p_in)); } @Test - void testFilter_true() throws Exception { + void testFilter_true_nothing_to_filter() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); From db299dd8abc48255a7197c8fb07f6b9fdfe47ebb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:24:06 +0100 Subject: [PATCH 156/176] fixed typo --- .../eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 1ffc66dfd..0a51e8600 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -48,13 +48,13 @@ public class OafMapperUtils { public static Result mergeResults(Result left, Result right) { - final boolean leftFromDeletedAuthority = isFromDelegatedAuthority(left); - final boolean rightFromDeletedAuthority = isFromDelegatedAuthority(right); + final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left); + final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right); - if (leftFromDeletedAuthority && !rightFromDeletedAuthority) { + if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) { return left; } - if (!leftFromDeletedAuthority && rightFromDeletedAuthority) { + if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) { return right; } From 8eb75ca16963286ca4585c0db5dcc96c4ac98ab5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:24:37 +0100 Subject: [PATCH 157/176] adapted GenerateEntitiesApplicationTest behaviour --- .../dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 67490a470..53b3f8432 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -51,8 +51,8 @@ class GenerateEntitiesApplicationTest { Result software = getResult("odf_software.xml", Software.class); Result orp = getResult("oaf_orp.xml", OtherResearchProduct.class); - verifyMerge(publication, dataset, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(dataset, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(publication, dataset, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); + verifyMerge(dataset, publication, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); verifyMerge(publication, software, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); verifyMerge(software, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); From dce7f5fea8de2b34bb3ff6ce11ac6a0081eb0f29 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 31 Jan 2022 08:20:28 +0100 Subject: [PATCH 158/176] [BULK TAGGING] changed to fix issue that should have been fixed already --- .../dhp/bulktag/community/ResultTagger.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c8b1bc8fe..1b625494a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -95,14 +95,14 @@ public class ResultTagger implements Serializable { } - result - .getInstance() - .stream() - .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey())) - .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) - .map(s -> StringUtils.substringAfter(s, "|")) - .collect(Collectors.toCollection(HashSet::new)) - .forEach( +// result +// .getInstance() +// .stream() +// .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey())) +// .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) +// .map(s -> StringUtils.substringAfter(s, "|")) +// .collect(Collectors.toCollection(HashSet::new)) + tmp.forEach( dsId -> datasources .addAll( conf.getCommunityForDatasource(dsId, param))); From 73eba34d425f7d23b8ae6fb260469bd8cdd61cb7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 1 Feb 2022 08:38:41 +0100 Subject: [PATCH 159/176] [UnresolvedEntities] Changed the way to merge the unresolved because the new merge removed the dataInfo from the merged result. Added also data info for subjects --- .../PrepareFOSSparkJob.java | 16 +++ .../PrepareSDGSparkJob.java | 15 +++ .../SparkSaveUnresolved.java | 15 ++- .../createunresolvedentities/PrepareTest.java | 111 ++++++++++-------- .../createunresolvedentities/ProduceTest.java | 3 + 5 files changed, 107 insertions(+), 53 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index fef796515..55e391932 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -21,8 +21,10 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareFOSSparkJob implements Serializable { @@ -71,6 +73,7 @@ public class PrepareFOSSparkJob implements Serializable { Result r = new Result(); FOSDataModel first = it.next(); r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); + HashSet level1 = new HashSet<>(); HashSet level2 = new HashSet<>(); HashSet level3 = new HashSet<>(); @@ -81,6 +84,19 @@ public class PrepareFOSSparkJob implements Serializable { level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); r.setSubject(sbjs); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java index 27da77c0c..a31e380fe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -21,8 +21,10 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareSDGSparkJob implements Serializable { @@ -78,6 +80,19 @@ public class PrepareSDGSparkJob implements Serializable { s -> sbjs .add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); r.setSubject(sbjs); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index ab8356836..3b9775094 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -67,7 +68,19 @@ public class SparkSaveUnresolved implements Serializable { .groupByKey((MapFunction) Result::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = it.next(); - it.forEachRemaining(r -> ret.mergeFrom(r)); + it.forEachRemaining(r -> { + if (r.getInstance() != null) { + ret.setInstance(r.getInstance()); + } + if (r.getSubject() != null) { + if (ret.getSubject() != null) + ret.getSubject().addAll(r.getSubject()); + else + ret.setSubject(r.getSubject()); + } + + // ret.mergeFrom(r) + }); return ret; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 166430c2f..cc8108bde 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -146,6 +146,11 @@ public class PrepareTest { .get(0) .getValue()); + final String doi2 = "unresolved::10.3390/s18072310::doi"; + + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); + } @Test @@ -259,59 +264,61 @@ public class PrepareTest { .collect() .contains("8. Economic growth")); - } - - @Test - void test3() throws Exception { - final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz"; - - final String outputPath = workingDir.toString() + "/fos.json"; - GetFOSSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - - "-outputPath", outputPath - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(outputPath) - .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); - - tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); + Assertions.assertEquals(32, tmp.filter(row -> row.getDataInfo() != null).count()); } - @Test - void test4() throws Exception { - final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; - - final String outputPath = workingDir.toString() + "/sdg.json"; - GetSDGSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - - "-outputPath", outputPath - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(outputPath) - .map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class)); - - tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null)); - - } +// @Test +// void test3() throws Exception { +// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz"; +// +// final String outputPath = workingDir.toString() + "/fos.json"; +// GetFOSSparkJob +// .main( +// new String[] { +// "--isSparkSessionManaged", Boolean.FALSE.toString(), +// "--sourcePath", sourcePath, +// +// "-outputPath", outputPath +// +// }); +// +// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); +// +// JavaRDD tmp = sc +// .textFile(outputPath) +// .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); +// +// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); +// +// } +// +// @Test +// void test4() throws Exception { +// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; +// +// final String outputPath = workingDir.toString() + "/sdg.json"; +// GetSDGSparkJob +// .main( +// new String[] { +// "--isSparkSessionManaged", Boolean.FALSE.toString(), +// "--sourcePath", sourcePath, +// +// "-outputPath", outputPath +// +// }); +// +// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); +// +// JavaRDD tmp = sc +// .textFile(outputPath) +// .map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class)); +// +// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null)); +// +// } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index ce44f0036..a5ecaeabf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -196,6 +196,9 @@ public class ProduceTest { final String doi = "unresolved::10.3390/s18072310::doi"; JavaRDD tmp = getResultJavaRDD(); + tmp + .filter(row -> row.getId().equals(doi)) + .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); Assertions .assertEquals( 3, tmp From 37784209c98fbd8d9b6c309334da259419301bb1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Feb 2022 12:46:31 +0100 Subject: [PATCH 160/176] [dhp-schemas-] updated the version of dhp-schema to 2.10.27 for APC name and id modification --- .../java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java | 3 ++- pom.xml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 1b625494a..71a2b3525 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -102,7 +102,8 @@ public class ResultTagger implements Serializable { // .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) // .map(s -> StringUtils.substringAfter(s, "|")) // .collect(Collectors.toCollection(HashSet::new)) - tmp.forEach( + tmp + .forEach( dsId -> datasources .addAll( conf.getCommunityForDatasource(dsId, param))); diff --git a/pom.xml b/pom.xml index a40942696..489f712b0 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.26] + [2.10.27] [4.0.3] [6.0.5] [3.1.6] From 2e215abfa8e30a80e1bc198e474494439f8b5e0b Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 2 Feb 2022 17:27:44 +0100 Subject: [PATCH 161/176] test for instances with URLs for OpenAPC --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 15 +++++++ .../dnetlib/dhp/oa/graph/raw/oaf_openapc.xml | 45 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 6a3414b7c..6a5fd33b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -814,6 +814,21 @@ class MappersTest { } } + @Test + void testOpenAPC() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_openapc.xml"))); + final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertTrue(p.getInstance().size() > 0); + + assertEquals("https://doi.org/10.1155/2015/439379", p.getInstance().get(0).getUrl().get(0)); + } + private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml index e69de29bb..4a326a21e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml @@ -0,0 +1,45 @@ + + + + openapc_____::000023f9cb6e3a247c764daec4273cbc + 10.1155/2015/439379 + 2022-02-01T15:26:33.817Z + openapc_____ + 2022-02-02T15:45:32.502Z + + + https://doi.org/10.1155/2015/439379 + 10.1155/2015/439379 + PMC4354964 + 25811027.0 + UCL + UCL + 1721.47 + BioMed Research International + http://creativecommons.org/licenses/by/3.0/ + 2015 + 0004 + OPEN + open access + + + + + + false + false + 0.9 + + + + + \ No newline at end of file From dbd2646d59f5bd7435be004656230ce1421234d1 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 3 Feb 2022 12:37:10 +0200 Subject: [PATCH 162/176] fixed the result_result creation for monitor --- .../graph/stats/oozie_app/scripts/step13.sql | 14 ++++++------- .../graph/stats/oozie_app/scripts/step14.sql | 10 +++++----- .../graph/stats/oozie_app/scripts/step15.sql | 8 ++++---- .../graph/stats/oozie_app/scripts/step2.sql | 20 +++++++++---------- .../scripts/step20-createMonitorDB.sql | 2 +- .../graph/stats/oozie_app/scripts/step3.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step4.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step5.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step6.sql | 12 ++++++----- .../graph/stats/oozie_app/scripts/step7.sql | 2 +- .../graph/stats/oozie_app/scripts/step8.sql | 13 ++++++------ .../graph/stats/oozie_app/scripts/step9.sql | 2 +- 12 files changed, 73 insertions(+), 70 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index 315d296fc..947c91072 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -14,7 +14,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -25,7 +25,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -36,7 +36,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -47,7 +47,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS SELECT * FROM ${stats_db_name}.publication_sources @@ -76,8 +76,8 @@ join ${openaire_db_name}.result r1 on rel.source=r1.id join ${openaire_db_name}.result r2 on r2.id=rel.target where reltype='resultResult' and r1.resulttype.classname!=r2.resulttype.classname - and r1.datainfo.deletedbyinference=false - and r2.datainfo.deletedbyinference=false + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE and r1.resulttype.classname != 'other' and r2.resulttype.classname != 'other' - and rel.datainfo.deletedbyinference=false; \ No newline at end of file + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index 00a6913bc..e3a33a893 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -8,22 +8,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses @@ -46,7 +46,7 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 8e66e05c0..f46b65171 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -9,22 +9,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as select * from ${stats_db_name}.publication_refereed diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index bb0d0ac6c..3d7a53a1c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -38,13 +38,13 @@ SELECT substr(p.id, 4) as id, case when size(p.description) > 0 then true else false end as abstract, 'publication' as type from ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, case @@ -53,45 +53,45 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_datasources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and p.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and p.datainfo.deletedbyinference = false; \ No newline at end of file + and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index e72378f56..2528e904d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -81,7 +81,7 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); +create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source) or exists (select 1 from TARGET.result r where r.id=orig.target); compute stats TARGET.result_result; -- datasources diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 953eaad6a..76a5e5a48 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -38,20 +38,20 @@ SELECT substr(d.id, 4) AS id, CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract, 'dataset' AS type FROM ${openaire_db_name}.dataset d -WHERE d.datainfo.deletedbyinference = FALSE; +WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and d.datainfo.deletedbyinference = false; + and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index 0210dc8cb..dc71f41f1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -38,20 +38,20 @@ SELECT substr(s.id, 4) as id, CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'software' as type from ${openaire_db_name}.software s -where s.datainfo.deletedbyinference = false; +where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and s.datainfo.deletedbyinference = false; + and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index f7b302186..353aa98b7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -37,19 +37,19 @@ SELECT substr(o.id, 4) AS id, CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'other' AS type FROM ${openaire_db_name}.otherresearchproduct o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; -- Otherresearchproduct_citations CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and o.datainfo.deletedbyinference = false; + and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) as id, case @@ -57,33 +57,33 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 378e0f17b..b5e2eb37b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -5,24 +5,26 @@ ------------------------------------------------------ CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid -FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; +FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; + CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype = 'projectOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; create table ${stats_db_name}.project_classification as select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class -where p.datainfo.deletedbyinference=false and class.h2020programme is not null; +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; CREATE TABLE ${stats_db_name}.project_tmp ( @@ -72,7 +74,7 @@ SELECT substr(p.id, 4) AS id, p.code.value AS code, p.totalcost AS totalcost FROM ${openaire_db_name}.project p -WHERE p.datainfo.deletedbyinference = false; +WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; create table ${stats_db_name}.funder as select distinct xpath_string(fund, '//funder/id') as id, diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index b3cbc9b41..a57966abf 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -127,7 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 76d31eb5e..d3935fd4a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -44,7 +44,7 @@ FROM ${openaire_db_name}.datasource d1 LATERAL VIEW EXPLODE(originalid) temp AS originalidd WHERE originalidd like "piwik:%") AS d2 ON d1.id = d2.id -WHERE d1.datainfo.deletedbyinference = FALSE; +WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; -- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. -- Creating a temporary dual table that will be removed after the following insert @@ -82,24 +82,25 @@ WHERE yearofvalidation = '-1'; CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) AS id, langs.languages AS language -FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages; +FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages +where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid -FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; +FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids +where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'datasourceOrganization' - and r.datainfo.deletedbyinference = false; +WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; -- datasource sources: -- where the datasource info have been collected from. create table if not exists ${stats_db_name}.datasource_sources AS select substr(d.id, 4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf -where d.datainfo.deletedbyinference = false; +where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index a1cb46185..c73aa811c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -9,7 +9,7 @@ SELECT substr(o.id, 4) as id, o.legalshortname.value as legalshortname, o.country.classid as country FROM ${openaire_db_name}.organization o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource From c2b44530a36f332e335a6a45190b4290285bdf73 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 3 Feb 2022 13:44:07 +0200 Subject: [PATCH 163/176] typo... --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index 3d7a53a1c..468a42045 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -64,7 +64,7 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false and p.datainfo.invisible=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language From aae667e6b60281adb0932e7c4beec8d10b076a1c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Feb 2022 12:34:25 +0100 Subject: [PATCH 164/176] [APC at the result level] added the APC at the level of the result and modified test class --- .../dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 4 ++++ .../test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java | 6 ++++++ pom.xml | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 526f45f6e..3e74bc5e0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -347,6 +347,10 @@ public abstract class AbstractMdRecordToOafMapper { r.setCoverage(prepareCoverages(doc, info)); r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES + r + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + r + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 6a5fd33b0..bdb73abf5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -827,6 +827,12 @@ class MappersTest { assertTrue(p.getInstance().size() > 0); assertEquals("https://doi.org/10.1155/2015/439379", p.getInstance().get(0).getUrl().get(0)); + + assertTrue(p.getProcessingchargeamount() != null); + assertTrue(p.getProcessingchargecurrency() != null); + + assertEquals("1721.47", p.getProcessingchargeamount().getValue()); + assertEquals("EUR", p.getProcessingchargecurrency().getValue()); } private void assertValidId(final String id) { diff --git a/pom.xml b/pom.xml index 489f712b0..a99162d26 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.27] + [2.10.28-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From ae633c566b9938729faf0cdd4638e69ac4bc958f Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 4 Feb 2022 15:04:19 +0200 Subject: [PATCH 165/176] fixed the result_result table --- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 2528e904d..4e5f7a373 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -81,7 +81,11 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source) or exists (select 1 from TARGET.result r where r.id=orig.target); +create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); +create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); +create table TARGET.result_result as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; compute stats TARGET.result_result; -- datasources From 9fd2ef468e25cb7140fee450693cce796b254bd4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Feb 2022 16:40:32 +0100 Subject: [PATCH 166/176] [APC at the result level] changed dependecy in external pom --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a99162d26..f3da49580 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.28-SNAPSHOT] + [2.10.28] [4.0.3] [6.0.5] [3.1.6] From 5f762cbd09163ff6a8ab4ade7300410ba6bb8dee Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Mon, 7 Feb 2022 12:09:12 +0200 Subject: [PATCH 167/176] fixed yet another typo --- .../oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 4e5f7a373..9e4edb44a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -130,7 +130,7 @@ compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; -create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_funder_country_collab stored as parquet as select * from SOURCE.indi_funder_country_collab; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_with_orcid; From fbc28ee8c35a4c30d4cc97d01557ab569cf907f4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 7 Feb 2022 18:32:08 +0100 Subject: [PATCH 168/176] [OpenCitation] change the integration logic to consider dois with commas inside --- .../CreateActionSetSparkJob.java | 33 ++++-- .../actionmanager/opencitations/ReadCOCI.java | 111 ++++++++++++++++++ .../opencitations/model/COCI.java | 89 ++++++++++++++ .../input_readcoci_parameters.json | 36 ++++++ .../opencitations/oozie_app/workflow.xml | 27 +++++ .../CreateOpenCitationsASTest.java | 22 ++-- .../opencitations/ReadCOCITest.java | 94 +++++++++++++++ .../opencitations/COCI/input1/_SUCCESS | 0 ...b-77f8-4059-91c0-5521309823f8-c000.json.gz | Bin 0 -> 346 bytes .../opencitations/COCI/input2/_SUCCESS | 0 ...6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz | Bin 0 -> 306 bytes .../opencitations/COCI/input3/_SUCCESS | 0 ...c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz | Bin 0 -> 316 bytes .../opencitations/COCI/input4/_SUCCESS | 0 ...b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz | Bin 0 -> 137 bytes .../opencitations/inputFiles/input4 | 2 + .../raw/AbstractMdRecordToOafMapper.java | 4 +- dhp-workflows/pom.xml | 2 +- 18 files changed, 394 insertions(+), 26 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4 diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index ea5fea96f..c16f8eeea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; @@ -21,6 +22,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -83,10 +85,16 @@ public class CreateActionSetSparkJob implements Serializable { private static void extractContent(SparkSession spark, String inputPath, String outputPath, boolean shouldDuplicateRels) { spark - .sqlContext() - .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) + .read() + .textFile(inputPath + "/*") + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, COCI.class), + Encoders.bean(COCI.class)) +// spark +// .sqlContext() +// .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) .flatMap( - (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), + (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), Encoders.bean(Relation.class)) .filter((FilterFunction) value -> value != null) .toJavaRDD() @@ -98,15 +106,14 @@ public class CreateActionSetSparkJob implements Serializable { } - private static List createRelation(String value, boolean duplicate) { - String[] line = value.split(","); - if (!line[1].startsWith("10.")) { - return new ArrayList<>(); - } + private static List createRelation(COCI value, boolean duplicate) { + List relationList = new ArrayList<>(); - String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1])); - final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2])); + String citing = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); + final String cited = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); relationList .addAll( @@ -114,9 +121,11 @@ public class CreateActionSetSparkJob implements Serializable { citing, cited)); - if (duplicate && line[1].endsWith(".refs")) { + if (duplicate && value.getCiting().endsWith(".refs")) { citing = ID_PREFIX + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs")))); + .md5( + CleaningFunctions + .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); relationList.addAll(getRelations(citing, cited)); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java new file mode 100644 index 000000000..7ac1f2de3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -0,0 +1,111 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; +import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class ReadCOCI implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(ReadCOCI.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + ReadCOCI.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String hdfsNameNode = parser.get("nameNode"); + log.info("nameNode: {}", hdfsNameNode); + + final String inputPath = parser.get("sourcePath"); + log.info("input path : {}", inputPath); + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + SparkConf sconf = new SparkConf(); + + final String delimiter = Optional + .ofNullable(parser.get("delimiter")) + .orElse(DEFAULT_DELIMITER); + + runWithSparkSession( + sconf, + isSparkSessionManaged, + spark -> { + doRead( + spark, + fileSystem, + inputPath, + outputPath, + delimiter); + }); + } + + public static void doRead(SparkSession spark, FileSystem fileSystem, String inputPath, String outputPath, + String delimiter) throws IOException { + + RemoteIterator iterator = fileSystem + .listFiles( + new Path(inputPath), true); + + while (iterator.hasNext()) { + LocatedFileStatus fileStatus = iterator.next(); + + Path p = fileStatus.getPath(); + String p_string = p.toString(); + Dataset cociData = spark + .read() + .format("csv") + .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(p_string); + + cociData.map((MapFunction) row -> { + COCI coci = new COCI(); + coci.setOci(row.getString(0)); + coci.setCiting(row.getString(1)); + coci.setCited(row.getString(2)); + return coci; + }, Encoders.bean(COCI.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + p_string.substring(p_string.lastIndexOf("/") + 1)); + } + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java new file mode 100644 index 000000000..a7b3330ea --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -0,0 +1,89 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByPosition; + +public class COCI implements Serializable { + @CsvBindByPosition(position = 0) +// @CsvBindByName(column = "doi") + private String oci; + + @CsvBindByPosition(position = 1) +// @CsvBindByName(column = "level1") + private String citing; + + @CsvBindByPosition(position = 2) +// @CsvBindByName(column = "level2") + private String cited; + + @CsvBindByPosition(position = 3) +// @CsvBindByName(column = "level3") + private String creation; + + @CsvBindByPosition(position = 4) + private String timespan; + + @CsvBindByPosition(position = 5) + private String journal_sc; + + @CsvBindByPosition(position = 6) + private String author_sc; + + public String getOci() { + return oci; + } + + public void setOci(String oci) { + this.oci = oci; + } + + public String getCiting() { + return citing; + } + + public void setCiting(String citing) { + this.citing = citing; + } + + public String getCited() { + return cited; + } + + public void setCited(String cited) { + this.cited = cited; + } + + public String getCreation() { + return creation; + } + + public void setCreation(String creation) { + this.creation = creation; + } + + public String getTimespan() { + return timespan; + } + + public void setTimespan(String timespan) { + this.timespan = timespan; + } + + public String getJournal_sc() { + return journal_sc; + } + + public void setJournal_sc(String journal_sc) { + this.journal_sc = journal_sc; + } + + public String getAuthor_sc() { + return author_sc; + } + + public void setAuthor_sc(String author_sc) { + this.author_sc = author_sc; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json new file mode 100644 index 000000000..14c20f762 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json @@ -0,0 +1,36 @@ +[ + { + "paramName": "sp", + "paramLongName": "sourcePath", + "paramDescription": "the zipped opencitations file", + "paramRequired": true + }, + + { + "paramName": "nn", + "paramLongName": "nameNode", + "paramDescription": "the hdfs name node", + "paramRequired": true + }, + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, + { + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, + { + "paramName": "op", + "paramLongName": "outputPath", + "paramDescription": "the hdfs name node", + "paramRequired": false + } +] + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index d052791a3..7276d2d3e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -26,6 +26,7 @@ ${wf:conf('resumeFrom') eq 'DownloadDump'} ${wf:conf('resumeFrom') eq 'ExtractContent'} + ${wf:conf('resumeFrom') eq 'ReadContent'} @@ -60,6 +61,32 @@ --inputFile${inputFile} --workingPath${workingPath} + + + + + + + yarn + cluster + Produces the AS for OC + eu.dnetlib.dhp.actionmanager.opencitations.ReadCOCI + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingPath}/COCI + --outputPath${workingDir}/COCI + --nameNode${nameNode} + --delimiter${delimiter} + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index 5153c412f..3e4ce750e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -76,7 +76,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -99,7 +99,7 @@ public class CreateOpenCitationsASTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - assertEquals(60, tmp.count()); + assertEquals(62, tmp.count()); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); @@ -110,7 +110,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -131,7 +131,7 @@ public class CreateOpenCitationsASTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - assertEquals(44, tmp.count()); + assertEquals(46, tmp.count()); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); @@ -142,7 +142,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -175,7 +175,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -215,7 +215,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -240,8 +240,8 @@ public class CreateOpenCitationsASTest { assertEquals("citation", r.getSubRelType()); assertEquals("resultResult", r.getRelType()); }); - assertEquals(22, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); - assertEquals(22, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); + assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); + assertEquals(23, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); } @@ -250,7 +250,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -295,7 +295,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java new file mode 100644 index 000000000..e1b9c4d23 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -0,0 +1,94 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; +import eu.dnetlib.dhp.schema.oaf.Dataset; + +public class ReadCOCITest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(ReadCOCITest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(ReadCOCITest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ReadCOCITest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(ReadCOCITest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testReadCOCI() throws Exception { + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + ReadCOCI + .doRead( + spark, FileSystem.getLocal(new Configuration()), inputPath, + workingDir.toString() + "/COCI", DEFAULT_DELIMITER); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/COCI/*/") + .map(item -> OBJECT_MAPPER.readValue(item, COCI.class)); + + Assertions.assertEquals(23, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count()); + + Assertions.assertEquals(8, tmp.filter(c -> c.getCiting().indexOf(".refs") > -1).count()); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..c55dcd71c6171ba9cf8c61bb0d51852c8c5104e8 GIT binary patch literal 346 zcmV-g0j2&QiwFP!000000L7EdYQr!PgztR}pIWGt{gGsC-^RG1bs;GvITS+Qy|YRP z2}vluxE9E3X=HypjQ8C*?Ut|IOUD!$*Wx2`&K$(JL?Nn?Bw_yQo?SYv-;P?Mez9e$ zIR)YzwA2(_^f^vY5RO8EtJfm6)s-@(qU1Xnccp?gtf+>Az6~vG+PemWp%XGZ6|6~n zV?{-%)1g-Fz904%eO$1mAdJc8XyCp+wMO_@*)=j3SU)Z|!)jXZ;5=(2i&SRfc7vPo zES#ig65%Q;cC?*Io9))-a;t+gF0x?88q$B74hpxrrAJYvo2C=ltN;K2 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae7886e8c57b098106a3c52959788b361cadc034 GIT binary patch literal 306 zcmV-20nPp&iwFP!000000KJq=PQx$|h4&mIE08dG^J9;P>y!%0f=UDlq>8&U<5Hvo z-NuRHjNcpkiyiwgZFXS%ZH}Z=hfkyk=neFG%cMfUxRMK z)=*>}SV9~A=srf((E7;a+EcBLV`*^<(Ff%U{DaK%^L}$z&1@}FmPXD>EEXA-*Ke*$uJhA& zSqgK}s#apRtuU1}+o1tyq}(|b2FFuF(~*1Jf0w6Hs;+b%oKK|;!aN7Q0BvPIpR5D` E0R117`Tzg` literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..8374019193373d40ded63283b49baf2bedbc8271 GIT binary patch literal 316 zcmV-C0mJ?uiwFP!000000L@cDPQ)+_yz`8nX*#x>xZyj?g319T7SKv7{?0gEAwWgq z)TKSNNyhfrGdXp;`DK6G-E@MKP0N505cBRjPUgq^s!t+P&7rF4d4G2x>S;F< z0%a89Q7fo$OkBrXVO()PUUPLBCF|5jMCO^IFal8)k%Q>&{p$3U7mLd9kc9mDnsH1F z^#qvIblimKf)Fb$sVu~cF(#9SMWL3{&D+-^pSjs;D{qI_Bx<>C%iTmLL*uW+XVjnU z76Vv?)^T$K#54m{F|P9($=c-t#?;1+bNhoa#V9TD|H_!WRcF~3cdi+es5w#sIM1pojL%bM;0 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..0436b10ffee3c5aa7b224d814fd1b437aed14221 GIT binary patch literal 137 zcmb2|=3syT(>_N&CPNO_?{bS@OV&=A7PbEJOrKXy;=7FR#lF&Dae7euDSqMC=et_G z1^Sw%TxM2fzRJCMRic#7{#<*FM-x(dj|T1B=DOTO+3#+2c?*Y;l&kK%+H?Odx=M5^ og)Y*lQ<`a|`gxtiR_%m0?d<>T_9e~}<$Zsc{V1()); // NOT PRESENT IN MDSTORES r - .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); r - .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 53d029467..143178560 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -44,7 +44,7 @@ iis-releases iis releases plugin repository - http://maven.ceon.pl/artifactory/iis-releases + https://maven.ceon.pl/artifactory/iis-releases default From b071f8e4154d6100db1acfeff9f6ba34efabc21d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 8 Feb 2022 15:37:28 +0100 Subject: [PATCH 169/176] [OpenCitation] change to extract in json format each folder just onece --- .../actionmanager/opencitations/ReadCOCI.java | 34 +++++--------- .../input_readcoci_parameters.json | 19 ++++---- .../opencitations/oozie_app/workflow.xml | 8 ++-- .../opencitations/ReadCOCITest.java | 47 +++++++++++++++++-- 4 files changed, 70 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java index 7ac1f2de3..fd83f7072 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -41,18 +41,14 @@ public class ReadCOCI implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String hdfsNameNode = parser.get("nameNode"); - log.info("nameNode: {}", hdfsNameNode); - - final String inputPath = parser.get("sourcePath"); - log.info("input path : {}", inputPath); + final String[] inputFile = parser.get("inputFile").split(";"); + log.info("inputFile {}", inputFile.toString()); Boolean isSparkSessionManaged = isSparkSessionManaged(parser); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); - FileSystem fileSystem = FileSystem.get(conf); SparkConf sconf = new SparkConf(); final String delimiter = Optional @@ -65,25 +61,20 @@ public class ReadCOCI implements Serializable { spark -> { doRead( spark, - fileSystem, - inputPath, + workingPath, + inputFile, outputPath, delimiter); }); } - public static void doRead(SparkSession spark, FileSystem fileSystem, String inputPath, String outputPath, + private static void doRead(SparkSession spark, String workingPath, String[] inputFiles, + String outputPath, String delimiter) throws IOException { - RemoteIterator iterator = fileSystem - .listFiles( - new Path(inputPath), true); + for(String inputFile : inputFiles){ + String p_string = workingPath + "/" + inputFile ; - while (iterator.hasNext()) { - LocatedFileStatus fileStatus = iterator.next(); - - Path p = fileStatus.getPath(); - String p_string = p.toString(); Dataset cociData = spark .read() .format("csv") @@ -91,7 +82,8 @@ public class ReadCOCI implements Serializable { .option("inferSchema", "true") .option("header", "true") .option("quotes", "\"") - .load(p_string); + .load(p_string) + .repartition(100); cociData.map((MapFunction) row -> { COCI coci = new COCI(); @@ -103,7 +95,7 @@ public class ReadCOCI implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "/" + p_string.substring(p_string.lastIndexOf("/") + 1)); + .json(outputPath + inputFile); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json index 14c20f762..b57cb5d9a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json @@ -1,17 +1,12 @@ [ { - "paramName": "sp", - "paramLongName": "sourcePath", + "paramName": "wp", + "paramLongName": "workingPath", "paramDescription": "the zipped opencitations file", "paramRequired": true }, - { - "paramName": "nn", - "paramLongName": "nameNode", - "paramDescription": "the hdfs name node", - "paramRequired": true - }, + { "paramName": "issm", "paramLongName": "isSparkSessionManaged", @@ -28,7 +23,13 @@ "paramName": "op", "paramLongName": "outputPath", "paramDescription": "the hdfs name node", - "paramRequired": false + "paramRequired": true + }, + { + "paramName": "if", + "paramLongName": "inputFile", + "paramDescription": "the hdfs name node", + "paramRequired": true } ] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index 7276d2d3e..aee2559ee 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -82,10 +82,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingPath}/COCI - --outputPath${workingDir}/COCI - --nameNode${nameNode} + --workingPath${workingPath}/COCI + --outputPath${workingPath}/COCI_JSON --delimiter${delimiter} + --inputFile${inputFileCoci} @@ -108,7 +108,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${workingPath}/COCI + --inputPath${workingPath}/COCI_JSON --outputPath${outputPath} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index e1b9c4d23..27627f9f6 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -10,6 +10,7 @@ import java.nio.file.Path; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -73,15 +74,53 @@ public class ReadCOCITest { "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") .getPath(); + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4")); + ReadCOCI - .doRead( - spark, FileSystem.getLocal(new Configuration()), inputPath, - workingDir.toString() + "/COCI", DEFAULT_DELIMITER); + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-workingPath", + workingDir.toString() + "/COCI", + "-outputPath", + workingDir.toString() + "/COCI_json/", + "-inputFile", "input1;input2;input3;input4" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/COCI/*/") + .textFile(workingDir.toString() + "/COCI_json/*/") .map(item -> OBJECT_MAPPER.readValue(item, COCI.class)); Assertions.assertEquals(23, tmp.count()); From 759ed519f251e976ce34571bd351713bb6c429a8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 8 Feb 2022 16:15:34 +0100 Subject: [PATCH 170/176] [OpenCitation] added logic to avoid the genration of self citations relations --- .../CreateActionSetSparkJob.java | 24 ++++++++++-------- .../opencitations/ReadCOCITest.java | 11 ++++++-- .../opencitations/COCI/input5/_SUCCESS | 0 ...e-90e3-4791-821a-b84636bc13e2-c000.json.gz | Bin 0 -> 20 bytes ...e-90e3-4791-821a-b84636bc13e2-c000.json.gz | Bin 0 -> 108 bytes .../opencitations/inputFiles/input5 | 2 ++ 6 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index c16f8eeea..4051bc6f0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -115,18 +115,20 @@ public class CreateActionSetSparkJob implements Serializable { final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); - relationList - .addAll( - getRelations( - citing, - cited)); + if(!citing.equals(cited)){ + relationList + .addAll( + getRelations( + citing, + cited)); - if (duplicate && value.getCiting().endsWith(".refs")) { - citing = ID_PREFIX + IdentifierFactory - .md5( - CleaningFunctions - .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); - relationList.addAll(getRelations(citing, cited)); + if (duplicate && value.getCiting().endsWith(".refs")) { + citing = ID_PREFIX + IdentifierFactory + .md5( + CleaningFunctions + .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); + relationList.addAll(getRelations(citing, cited)); + } } return relationList; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index 27627f9f6..53af074e1 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -103,6 +103,13 @@ public class ReadCOCITest { .getPath()), new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4")); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5")); + ReadCOCI .main( new String[] { @@ -112,7 +119,7 @@ public class ReadCOCITest { workingDir.toString() + "/COCI", "-outputPath", workingDir.toString() + "/COCI_json/", - "-inputFile", "input1;input2;input3;input4" + "-inputFile", "input1;input2;input3;input4;input5" }); @@ -123,7 +130,7 @@ public class ReadCOCITest { .textFile(workingDir.toString() + "/COCI_json/*/") .map(item -> OBJECT_MAPPER.readValue(item, COCI.class)); - Assertions.assertEquals(23, tmp.count()); + Assertions.assertEquals(24, tmp.count()); Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count()); diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..12968af39e578a2c9afae321fee5a918e8ad8ee1 GIT binary patch literal 108 zcmb2|=3sz;si$`HHW=`*T$pDVBW!W}XT%y|0WW4&5s!|$hB^%e#n Date: Tue, 8 Feb 2022 16:23:05 +0100 Subject: [PATCH 171/176] [OpenCitation] refactoring --- .../actionmanager/opencitations/CreateActionSetSparkJob.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index 4051bc6f0..f230a7fd7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -90,9 +90,6 @@ public class CreateActionSetSparkJob implements Serializable { .map( (MapFunction) value -> OBJECT_MAPPER.readValue(value, COCI.class), Encoders.bean(COCI.class)) -// spark -// .sqlContext() -// .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) .flatMap( (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), Encoders.bean(Relation.class)) From 600ede1798beb34c282dba65593e229385d908a1 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 11 Feb 2022 11:00:20 +0100 Subject: [PATCH 172/176] serialisation of APCs int he XML records --- .../raw/AbstractMdRecordToOafMapper.java | 4 +-- .../oa/provision/utils/XmlRecordFactory.java | 10 ++++++ .../oa/provision/XmlRecordFactoryTest.java | 3 ++ .../dnetlib/dhp/oa/provision/publication.json | 34 ++++++++++++++++++- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 3e74bc5e0..3e8ca1763 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -348,9 +348,9 @@ public abstract class AbstractMdRecordToOafMapper { r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r - .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); r - .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 9a3f2188b..e51a84e02 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -398,6 +398,16 @@ public class XmlRecordFactory implements Serializable { if (r.getResourcetype() != null) { metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype())); } + if (r.getProcessingchargeamount() != null) { + metadata + .add( + XmlSerializationUtils + .asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue())); + metadata + .add( + XmlSerializationUtils + .asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue())); + } } switch (type) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index a4b6182bc..c1e0567e6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -66,6 +66,9 @@ public class XmlRecordFactoryTest { assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()")); assertEquals(3, doc.selectNodes("//instance").size()); + + assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()")); + assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()")); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index d5aa13ed6..3b5b54bbf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -1655,5 +1655,37 @@ }, "value": "Understanding Electromigration in Cu-CNT Composite Interconnects A Multiscale Electrothermal Simulation Study" } - ] + ], + "processingchargeamount": { + "value": "1721.47", + "dataInfo": { + "invisible": true, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargecurrency": { + "value": "EUR", + "dataInfo": { + "invisible": true, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } } \ No newline at end of file From 615817033410a288709c47a310c35572e5b59d05 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 11 Feb 2022 18:05:18 +0100 Subject: [PATCH 173/176] testing delegated authority and bumped dep to schemas --- .../schema/oaf/utils/OafMapperUtilsTest.java | 16 ++ .../dhp/schema/oaf/utils/dataset_2.json | 141 +++++++++++++++++- .../schema/oaf/utils/dataset_delegated.json | 140 +++++++++++++++++ pom.xml | 2 +- 4 files changed, 297 insertions(+), 2 deletions(-) create mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 8804469fa..79629a171 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -185,6 +185,22 @@ class OafMapperUtilsTest { .getClassid()); } + @Test + void testDelegatedAuthority() throws IOException { + Dataset d1 = read("dataset_2.json", Dataset.class); + Dataset d2 = read("dataset_delegated.json", Dataset.class); + + assertEquals(1, d2.getCollectedfrom().size()); + assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID)); + + Result res = OafMapperUtils.mergeResults(d1, d2); + + assertEquals(d2, res); + + System.out.println(OBJECT_MAPPER.writeValueAsString(res)); + + } + protected HashSet cfId(List collectedfrom) { return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json index 52e4e126a..c880edb7d 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json @@ -1 +1,140 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]} \ No newline at end of file +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", + "resuttype": {"classid": "dataset"}, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2011.03.013" + }, + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ], + "collectedfrom": [ + { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", + "value": "Repository B" + } + ], + "instance": [ + { + "refereed": { + "classid": "0000", + "classname": "UNKNOWN", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, + "hostedby": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "processingchargecurrency": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "EUR" + }, + "pid": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "Digital Object Identifier", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1371/journal.pone.0085605" + } + ], + "distributionlocation": "", + "url": ["https://doi.org/10.1371/journal.pone.0085605"], + "alternateIdentifier": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "PubMed ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "24454899.0" + } + ], + "collectedfrom": { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", + "value": "Repository B" + }, + "processingchargeamount": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "1022.02" + }, + "instancetype": { + "classid": "0004", + "classname": "Conference object", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + } + } + ] +} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json new file mode 100644 index 000000000..967c1181b --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json @@ -0,0 +1,140 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", + "resuttype": {"classid": "dataset"}, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2011.03.013" + }, + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ], + "collectedfrom": [ + { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + } + ], + "instance": [ + { + "refereed": { + "classid": "0000", + "classname": "UNKNOWN", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, + "hostedby": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "processingchargecurrency": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "EUR" + }, + "pid": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "Digital Object Identifier", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1371/journal.pone.0085605" + } + ], + "distributionlocation": "", + "url": ["https://doi.org/10.1371/journal.pone.0085605"], + "alternateIdentifier": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "PubMed ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "24454899.0" + } + ], + "collectedfrom": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "processingchargeamount": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "1022.02" + }, + "instancetype": { + "classid": "0004", + "classname": "Conference object", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + } + } + ] +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index f3da49580..603a4cf1e 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.28] + [2.10.29] [4.0.3] [6.0.5] [3.1.6] From 1490867cc7886746b5b9925b9bd5a3ebd7499cb5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 14 Feb 2022 14:52:12 +0100 Subject: [PATCH 174/176] [OpenCitation] cleaning of the COCI model --- .../opencitations/model/COCI.java | 50 +------------------ 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java index a7b3330ea..bad4a5a3b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -6,30 +6,12 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByPosition; public class COCI implements Serializable { - @CsvBindByPosition(position = 0) -// @CsvBindByName(column = "doi") private String oci; - @CsvBindByPosition(position = 1) -// @CsvBindByName(column = "level1") private String citing; - @CsvBindByPosition(position = 2) -// @CsvBindByName(column = "level2") private String cited; - @CsvBindByPosition(position = 3) -// @CsvBindByName(column = "level3") - private String creation; - - @CsvBindByPosition(position = 4) - private String timespan; - - @CsvBindByPosition(position = 5) - private String journal_sc; - - @CsvBindByPosition(position = 6) - private String author_sc; public String getOci() { return oci; @@ -55,35 +37,5 @@ public class COCI implements Serializable { this.cited = cited; } - public String getCreation() { - return creation; - } - - public void setCreation(String creation) { - this.creation = creation; - } - - public String getTimespan() { - return timespan; - } - - public void setTimespan(String timespan) { - this.timespan = timespan; - } - - public String getJournal_sc() { - return journal_sc; - } - - public void setJournal_sc(String journal_sc) { - this.journal_sc = journal_sc; - } - - public String getAuthor_sc() { - return author_sc; - } - - public void setAuthor_sc(String author_sc) { - this.author_sc = author_sc; - } + } From be64055cfe212bcfc945da6fa53c76e15f977b8b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 14 Feb 2022 15:49:44 +0100 Subject: [PATCH 175/176] [OpenCitation] changed the name of destination folders --- .../actionmanager/opencitations/ReadCOCI.java | 2 +- .../opencitations/model/COCI.java | 2 +- .../opencitations/ReadCOCITest.java | 20 +++++++++--------- .../opencitations/inputFiles/input1 | 8 ------- .../opencitations/inputFiles/input1.gz | Bin 0 -> 422 bytes .../opencitations/inputFiles/input2 | 8 ------- .../opencitations/inputFiles/input2.gz | Bin 0 -> 378 bytes .../opencitations/inputFiles/input3 | 9 -------- .../opencitations/inputFiles/input3.gz | Bin 0 -> 395 bytes .../opencitations/inputFiles/input4 | 2 -- .../opencitations/inputFiles/input4.gz | Bin 0 -> 179 bytes .../opencitations/inputFiles/input5 | 2 -- .../opencitations/inputFiles/input5.gz | Bin 0 -> 150 bytes 13 files changed, 12 insertions(+), 41 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java index fd83f7072..3fd6e8e9a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -73,7 +73,7 @@ public class ReadCOCI implements Serializable { String delimiter) throws IOException { for(String inputFile : inputFiles){ - String p_string = workingPath + "/" + inputFile ; + String p_string = workingPath + "/" + inputFile + ".gz"; Dataset cociData = spark .read() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java index bad4a5a3b..ce087ce22 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -37,5 +37,5 @@ public class COCI implements Serializable { this.cited = cited; } - + } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index 53af074e1..629e95c8e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -78,37 +78,37 @@ public class ReadCOCITest { fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz")); ReadCOCI .main( diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 deleted file mode 100644 index d93d6fd99..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 +++ /dev/null @@ -1,8 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000007362801000805046300010563030608046333-0200101010136193701050501630209010637020000083700020400083733,10.1007/s10854-015-3684-x,10.1111/j.1551-2916.2008.02408.x,2015-09-01,P7Y2M,no,no -02001000007362801000805046300010563030608046333-02001000007362801000805046300010463020101046309,10.1007/s10854-015-3684-x,10.1007/s10854-014-2114-9,2015-09-01,P1Y2M4D,yes,no -02001000007362801000805046300010563030608046333-020010001063619371214271022182329370200010337000937000609,10.1007/s10854-015-3684-x,10.1016/j.ceramint.2013.09.069,2015-09-01,P1Y6M,no,no -02001000007362801000805046300010563030608046333-02001000007362801000805046300000963090901036304,10.1007/s10854-015-3684-x,10.1007/s10854-009-9913-4,2015-09-01,P6Y3M10D,yes,no -02001000007362801000805046300010563030608046333-02001000106360000030863010009085807025909000307006305,10.1007/s10854-015-3684-x,10.1016/0038-1098(72)90370-5,2015-09-01,P43Y8M,no,no -02001000007362801000805046300010563030608056309-02001000106361937281010370200010437000937000308,10.1007/s10854-015-3685-9,10.1016/j.saa.2014.09.038,2015-09-03,P0Y7M,no,no -02001000007362801000805046300010563030608056309-0200100010636193722102912171027370200010537000437000106,10.1007/s10854-015-3685-9,10.1016/j.matchar.2015.04.016,2015-09-03,P0Y2M,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a734f4e201d4520ede681fcda66aa75e2fb8706 GIT binary patch literal 422 zcmV;X0a^YZiwFqDY6@Zi18Ht>b#yTRwUk>=tuPRU-~A<;v^bZxwC}!}_`07=!y$?} z8w{`${r9eRtH-S|UQbKPFf+i{#R^k5m~L2y@v5BqKc<`eybjZ7)?wQ(w|O)-({3K~ z-^-;ldAD9qGom9BBBnB04H9b{$&(98rQ&f`3dkX&YWMwhYT%@8>A__52_;+w zxZb80o>+1&c5u}W8vn7Xxu8w^d#C~vC)l}|xqU|V6?PAf-cKQK^V(hKxnTQ-K+fS6 Q_S4_u0fkiL0VM?h0048#m;e9( literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 deleted file mode 100644 index 14ee8b354..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 +++ /dev/null @@ -1,8 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000308362804010509076300010963000003086301-0200100020936020001003227000009010004,10.1038/s41597-019-0038-1,10.1029/2010wr009104,2019-04-15,P8Y1M,no,no -02001000308362804010509076300010963000003086301-0200100010636280103060463080105025800015900000006006303,10.1038/s41597-019-0038-1,10.1016/s1364-8152(01)00060-3,2019-04-15,P17Y3M,no,no -02001000308362804010509076300010963000003086301-02001000007362800000407076300010063000401066333,10.1038/s41597-019-0038-1,10.1007/s00477-010-0416-x,2019-04-15,P8Y9M6D,no,no -02001000308362804010509076300010963000003086301-02001000007362800000700046300010363000905016308,10.1038/s41597-019-0038-1,10.1007/s00704-013-0951-8,2019-04-15,P5Y9M23D,no,no -02001000308362804010509076300010963000003086301-02001000002361924123705070707,10.1038/s41597-019-0038-1,10.1002/joc.5777,2019-04-15,P0Y8M1D,no,no -02001000308362804010509076300010963000003086301-02005010904361714282863020263040504076302000108,10.1038/s41597-019-0038-1,10.5194/hess-22-4547-2018,2019-04-15,P0Y7M18D,no,no -02001000308362804010509076300010963000003086301-02001000002361924123703050404,10.1038/s41597-019-0038-1,10.1002/joc.3544,2019-04-15,P6Y9M6D,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz new file mode 100644 index 0000000000000000000000000000000000000000..b30aa073cc3b8c0515bd3cf766e184a805a7705b GIT binary patch literal 378 zcmV-=0fqh_iwFqDY6@Zi18Ht>b#yWSt&>ep!!Qhn@B9*wn$=&4lRMW+Tz5iKnOG6D zsO+Ms?Enb@a%MhqkNQZtnW$9JWtYJ8m~~H}uuqy=|xWO<%nX`?24= zK2B}5+0VOSwABp|00IC*LISNsrtd@zu6E%@f!jJ5YuAGBpPgy7Vxg5IzGWhUGt^f09yQc2b#yZTy;DtZgD?!e@3(~D4~WTq*Gju`nh@(YrVS>b zQ-A*Kw5mogMQt^LlrKN$`Pm-Y&b8g#^=~$9UtBwGo4Ff$H+Ns#>AUIO$FLjw=JR=K zU9+3thf!A#KmZ8#5eJAMQ0d-A1lFCN+h8HV0!ykgLzH?)5EKDWBs7|}L-6X1@+cJ| zMiw9oK}z=|mWn2kvCDJ*Gb z^T)z`fwCT9{^3!nXR(C6S2yAnkJ0a!F;HFIDF#NW7*ZUh_9etMEPtk*N8&OrXcaZW zhT`K*%>Jg^G&QRT#j1331d7E{Dyi`{sh0IeiI&Q>uKrY0!6hNCb#ycUHBCtl!Y~ZH^NgNd7-uDqK;p^?RcQsOAVF>V ze;ifyZjZ+=JN@6AjSRG0rRpFDqNEyfc1|qbAdXJ8BqM hFgaomx^qF3d{I5iJElAP2j3_cu|J_U8=~j{004P1OThpD literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 deleted file mode 100644 index 0a1cd5aca..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 +++ /dev/null @@ -1,2 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000007362801000805046300010563030608046333-02001000007362801000805046300010563030608046333,10.1007/s10854-015-3684-x,10.1007/s10854-015-3684-x,2015-09-01,P7Y2M,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e7a6c85b1bd6f4478ad720806217cd4aa2b8779 GIT binary patch literal 150 zcmV;H0BQdpiwFqDY6@Zi18Ht>b#yfVm5NIWLNO2o_nu{k`2l0cnMH!HD;dMoq?^6&?La9jG?`4-Q%=bK(F1 E0PH42g8%>k literal 0 HcmV?d00001 From 3aa2020b24abd65b2bcdc9bb6c96d2babb359c1e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 15 Feb 2022 11:05:27 +0100 Subject: [PATCH 176/176] added script to regenerate hostedBy Map following instruction defined on ticket #7539 updated hosted By Map --- .../dhp/datacite/create_updated_hb_map.py | 63 + .../eu/dnetlib/dhp/datacite/hostedBy_map.json | 1065 ++++++++++++++++- 2 files changed, 1103 insertions(+), 25 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py new file mode 100644 index 000000000..db0431aae --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py @@ -0,0 +1,63 @@ +from urllib.request import urlopen +import json + + +def retrieve_datacite_clients(base_url): + datacite_clients = {} + while base_url is not None: + with urlopen(base_url) as response: + print(f"requesting {base_url}") + response_content = response.read() + data = json.loads(response_content) + if 'data' in data and len(data['data'])>0: + for item in data['data']: + datacite_clients[item['id'].lower()]= item['attributes']['re3data'].lower().replace("https://doi.org/","") + base_url = data['links']['next'] + else: + base_url = None + return datacite_clients + + +def retrieve_r3data(start_url): + r3data_clients = {} + page_number = 1 + base_url = start_url + while base_url is not None: + with urlopen(base_url) as response: + print(f"requesting {base_url}") + response_content = response.read() + data = json.loads(response_content) + if 'data' in data and len(data['data'])>0: + for item in data['data']: + r3data_clients[item['id'].lower()]= dict( + openaire_id= "re3data_____::"+item['attributes']['re3dataId'].lower(), + official_name=item['attributes']['repositoryName'] + ) + page_number +=1 + base_url = f"{start_url}&page[number]={page_number}" + else: + base_url = None + return r3data_clients + + + + + + +base_url ="https://api.datacite.org/clients?query=re3data_id:*&page[size]=250" + +dc = retrieve_datacite_clients(base_url) +r3 = retrieve_r3data("https://api.datacite.org/re3data?page[size]=250") + +result = {} + +for item in dc: + res = dc[item].lower() + if res not in r3: + print(f"missing {res} for {item} in dictionary") + else: + result[item.upper()]= dict(openaire_id=r3[res]["openaire_id"],datacite_name=r3[res]["official_name"], official_name=r3[res]["official_name"] ) + + +with open('hostedBy_map.json', 'w', encoding='utf8') as json_file: + json.dump(result, json_file, ensure_ascii=False, indent=1) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json index 5aa50d6a8..947a9a255 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json @@ -1,27 +1,1042 @@ { - "CERN.ZENODO": { - "openaire_id": "opendoar____::2659", - "datacite_name": "Zenodo", - "official_name": "ZENODO" - }, - "TIB.PANGAEA": { - "openaire_id": "re3data_____::r3d100010134", - "datacite_name": "PANGAEA", - "official_name": "PANGAEA" - }, - "DRYAD.DRYAD": { - "openaire_id": "re3data_____::r3d100000044", - "datacite_name": "DRYAD", - "official_name": "DRYAD" - }, - "FIGSHARE.UCT": { - "openaire_id": "re3data_____::r3d100012633", - "datacite_name": "University of Cape Town (UCT)", - "official_name": "ZivaHub" - }, - "CSIC.DIGITAL": { - "openaire_id": "re3data_____::r3d100011076", - "datacite_name": "Digital CSIC", - "official_name": "DIGITAL.CSIC" - } + "GESIS.RKI": { + "openaire_id": "re3data_____::r3d100010436", + "datacite_name": "Forschungsdatenzentrum am Robert Koch Institut", + "official_name": "Forschungsdatenzentrum am Robert Koch Institut" + }, + "DELFT.DATA4TU": { + "openaire_id": "re3data_____::r3d100010216", + "datacite_name": "4TU.ResearchData | science.engineering.design", + "official_name": "4TU.ResearchData | science.engineering.design" + }, + "FBTK.DMXFNR": { + "openaire_id": "re3data_____::r3d100013444", + "datacite_name": "Aperta TÜBİTAK Open Archive", + "official_name": "Aperta TÜBİTAK Open Archive" + }, + "BL.CAM": { + "openaire_id": "re3data_____::r3d100010620", + "datacite_name": "Apollo", + "official_name": "Apollo" + }, + "NU.ARCH": { + "openaire_id": "re3data_____::r3d100012925", + "datacite_name": "Arch", + "official_name": "Arch" + }, + "BL.ADS": { + "openaire_id": "re3data_____::r3d100000006", + "datacite_name": "Archaeology Data Service", + "official_name": "Archaeology Data Service" + }, + "FZJ.B2SHARE": { + "openaire_id": "re3data_____::r3d100011394", + "datacite_name": "B2SHARE", + "official_name": "B2SHARE" + }, + "STSCI.MAST": { + "openaire_id": "re3data_____::r3d100010403", + "datacite_name": "Barbara A. Mikulski Archive for Space Telescopes", + "official_name": "Barbara A. Mikulski Archive for Space Telescopes" + }, + "CBG.DATASETS": { + "openaire_id": "re3data_____::r3d100010927", + "datacite_name": "Barcode of Life Data Systems", + "official_name": "Barcode of Life Data Systems" + }, + "TIB.BEILST": { + "openaire_id": "re3data_____::r3d100012329", + "datacite_name": "STRENDA DB", + "official_name": "STRENDA DB" + }, + "MLBS.SKUXGS": { + "openaire_id": "re3data_____::r3d100011696", + "datacite_name": "biodiversity.aq", + "official_name": "biodiversity.aq" + }, + "BL.BIRKBECK": { + "openaire_id": "re3data_____::r3d100012185", + "datacite_name": "Birkbeck Research Data", + "official_name": "Birkbeck Research Data" + }, + "SND.BOLIN": { + "openaire_id": "re3data_____::r3d100011699", + "datacite_name": "Bolin Centre Database", + "official_name": "Bolin Centre Database" + }, + "BROWN.BDR": { + "openaire_id": "re3data_____::r3d100011654", + "datacite_name": "Brown Digital Repository", + "official_name": "Brown Digital Repository" + }, + "BL.BRUNEL": { + "openaire_id": "re3data_____::r3d100012140", + "datacite_name": "Brunel figshare", + "official_name": "Brunel figshare" + }, + "TIB.BAFG": { + "openaire_id": "re3data_____::r3d100011664", + "datacite_name": "Geoportal der BFG", + "official_name": "Geoportal der BFG" + }, + "TIND.CALTECH": { + "openaire_id": "re3data_____::r3d100012384", + "datacite_name": "CaltechDATA", + "official_name": "CaltechDATA" + }, + "CUL.CIESIN": { + "openaire_id": "re3data_____::r3d100010207", + "datacite_name": "Center for International Earth Science Information Network", + "official_name": "Center for International Earth Science Information Network" + }, + "TIB.KIT-IOC": { + "openaire_id": "re3data_____::r3d100010748", + "datacite_name": "chemotion", + "official_name": "chemotion" + }, + "CORNELL.CISER": { + "openaire_id": "re3data_____::r3d100011056", + "datacite_name": "CISER Data & Reproduction Archive", + "official_name": "CISER Data & Reproduction Archive" + }, + "CLARIN.CLARIN": { + "openaire_id": "re3data_____::r3d100010209", + "datacite_name": "CLARIN-ERIC", + "official_name": "CLARIN-ERIC" + }, + "OCEAN.OCEAN": { + "openaire_id": "re3data_____::r3d100012369", + "datacite_name": "Code Ocean", + "official_name": "Code Ocean" + }, + "CORNELL.LIBRARY": { + "openaire_id": "re3data_____::r3d100012322", + "datacite_name": "eCommons - Cornell's digital repository", + "official_name": "eCommons - Cornell's digital repository" + }, + "BL.CRAN": { + "openaire_id": "re3data_____::r3d100012068", + "datacite_name": "Cranfield Online Research Data", + "official_name": "Cranfield Online Research Data" + }, + "DARTLIB.CRAWDAD": { + "openaire_id": "re3data_____::r3d100010716", + "datacite_name": "CRAWDAD", + "official_name": "CRAWDAD" + }, + "GESIS.CSDA": { + "openaire_id": "re3data_____::r3d100010484", + "datacite_name": "Czech Social Science Data Archive", + "official_name": "Czech Social Science Data Archive" + }, + "PSU.DATACOM": { + "openaire_id": "re3data_____::r3d100012927", + "datacite_name": "Data Commons", + "official_name": "Data Commons" + }, + "INIST.INRA": { + "openaire_id": "re3data_____::r3d100012673", + "datacite_name": "Data INRAE", + "official_name": "Data INRAE" + }, + "UMN.DRUM": { + "openaire_id": "re3data_____::r3d100011393", + "datacite_name": "Data Repository for the University of Minnesota", + "official_name": "Data Repository for the University of Minnesota" + }, + "ESTDOI.REPO": { + "openaire_id": "re3data_____::r3d100012333", + "datacite_name": "DataDOI", + "official_name": "DataDOI" + }, + "DAFI.CLIENT": { + "openaire_id": "re3data_____::r3d100010217", + "datacite_name": "DataFirst", + "official_name": "DataFirst" + }, + "UNM.DATAONE": { + "openaire_id": "re3data_____::r3d100000045", + "datacite_name": "DataONE", + "official_name": "DataONE" + }, + "FCT.UMINHO": { + "openaire_id": "re3data_____::r3d100013173", + "datacite_name": "DataRepositoriUM", + "official_name": "DataRepositoriUM" + }, + "FIGSHARE.IASTATE": { + "openaire_id": "re3data_____::r3d100012696", + "datacite_name": "DataShare: the Open Data Repository of Iowa State University", + "official_name": "DataShare: the Open Data Repository of Iowa State University" + }, + "PU.DATASPACE": { + "openaire_id": "re3data_____::r3d100012513", + "datacite_name": "DataSpace", + "official_name": "DataSpace" + }, + "DANS.DATAVERSENL": { + "openaire_id": "re3data_____::r3d100011201", + "datacite_name": "DataverseNL", + "official_name": "DataverseNL" + }, + "BIBSYS.UIT-ORD": { + "openaire_id": "re3data_____::r3d100012538", + "datacite_name": "DataverseNO", + "official_name": "DataverseNO" + }, + "GESIS.SSRI": { + "openaire_id": "re3data_____::r3d100013494", + "datacite_name": "DATICE", + "official_name": "DATICE" + }, + "SML.TDAR": { + "openaire_id": "re3data_____::r3d100010347", + "datacite_name": "tDAR", + "official_name": "tDAR" + }, + "CSIC.DIGITAL": { + "openaire_id": "re3data_____::r3d100011076", + "datacite_name": "DIGITAL.CSIC", + "official_name": "DIGITAL.CSIC" + }, + "BL.DRI": { + "openaire_id": "re3data_____::r3d100011805", + "datacite_name": "Digital Repository of Ireland", + "official_name": "Digital Repository of Ireland" + }, + "SUBGOE.DARIAH": { + "openaire_id": "re3data_____::r3d100011345", + "datacite_name": "DARIAH-DE Repository", + "official_name": "DARIAH-DE Repository" + }, + "DRYAD.DRYAD": { + "openaire_id": "re3data_____::r3d100000044", + "datacite_name": "DRYAD", + "official_name": "DRYAD" + }, + "ZBMED.DSMZ": { + "openaire_id": "re3data_____::r3d100010219", + "datacite_name": "DSMZ", + "official_name": "DSMZ" + }, + "DKRZ.ESGF": { + "openaire_id": "re3data_____::r3d100011159", + "datacite_name": "Earth System Grid Federation", + "official_name": "Earth System Grid Federation" + }, + "KTSW.AEZVVV": { + "openaire_id": "re3data_____::r3d100013469", + "datacite_name": "EarthEnv", + "official_name": "EarthEnv" + }, + "DANS.ARCHIVE": { + "openaire_id": "re3data_____::r3d100010214", + "datacite_name": "EASY", + "official_name": "EASY" + }, + "ETHZ.WSL": { + "openaire_id": "re3data_____::r3d100012587", + "datacite_name": "EnviDat", + "official_name": "EnviDat" + }, + "ETHZ.E-COLL": { + "openaire_id": "re3data_____::r3d100012557", + "datacite_name": "ETH Zürich Research Collection", + "official_name": "ETH Zürich Research Collection" + }, + "ETHZ.DA-RD": { + "openaire_id": "re3data_____::r3d100011626", + "datacite_name": "ETH Data Archive", + "official_name": "ETH Data Archive" + }, + "BL.ECMWF": { + "openaire_id": "re3data_____::r3d100011726", + "datacite_name": "European Centre for Medium-Range Weather Forecasts", + "official_name": "European Centre for Medium-Range Weather Forecasts" + }, + "CARL.FRDR": { + "openaire_id": "re3data_____::r3d100012646", + "datacite_name": "Federated Research Data Repository", + "official_name": "Federated Research Data Repository" + }, + "FIGSHARE.ARS": { + "openaire_id": "re3data_____::r3d100010066", + "datacite_name": "figshare", + "official_name": "figshare" + }, + "TIB.FLOSS": { + "openaire_id": "re3data_____::r3d100010863", + "datacite_name": "FLOSSmole", + "official_name": "FLOSSmole" + }, + "LXKC.DSKYFI": { + "openaire_id": "re3data_____::r3d100010976", + "datacite_name": "ForestPlots.net", + "official_name": "ForestPlots.net" + }, + "YKDK.ZUYSQI": { + "openaire_id": "re3data_____::r3d100010368", + "datacite_name": "FORS DARIS", + "official_name": "FORS DARIS" + }, + "TIB.LUIS": { + "openaire_id": "re3data_____::r3d100012825", + "datacite_name": "Forschungsdaten-Repositorium der LUH", + "official_name": "Forschungsdaten-Repositorium der LUH" + }, + "GESIS.BIBB-FDZ": { + "openaire_id": "re3data_____::r3d100010190", + "datacite_name": "Forschungsdatenzentrum im Bundesinstitut für Berufsbildung", + "official_name": "Forschungsdatenzentrum im Bundesinstitut für Berufsbildung" + }, + "GESIS.ZPID": { + "openaire_id": "re3data_____::r3d100010328", + "datacite_name": "PsychData", + "official_name": "PsychData" + }, + "TIB.GFZ": { + "openaire_id": "re3data_____::r3d100012335", + "datacite_name": "GFZ Data Services", + "official_name": "GFZ Data Services" + }, + "CNGB.GIGADB": { + "openaire_id": "re3data_____::r3d100010478", + "datacite_name": "GigaDB", + "official_name": "GigaDB" + }, + "GBIF.GBIF": { + "openaire_id": "re3data_____::r3d100000039", + "datacite_name": "Global Biodiversity Information Facility", + "official_name": "Global Biodiversity Information Facility" + }, + "ARDCX.GRIFFITH": { + "openaire_id": "re3data_____::r3d100010864", + "datacite_name": "Griffith University Research Data Repository", + "official_name": "Griffith University Research Data Repository" + }, + "GDCC.HARVARD-SBGR": { + "openaire_id": "re3data_____::r3d100011601", + "datacite_name": "Structural Biology Data Grid", + "official_name": "Structural Biology Data Grid" + }, + "GDCC.HARVARD-DV": { + "openaire_id": "re3data_____::r3d100010051", + "datacite_name": "Harvard Dataverse", + "official_name": "Harvard Dataverse" + }, + "CERN.HEPDATA": { + "openaire_id": "re3data_____::r3d100010081", + "datacite_name": "HEPData", + "official_name": "HEPData" + }, + "SND.ICOS": { + "openaire_id": "re3data_____::r3d100012203", + "datacite_name": "ICOS Carbon Portal", + "official_name": "ICOS Carbon Portal" + }, + "GESIS.ICPSR": { + "openaire_id": "re3data_____::r3d100010255", + "datacite_name": "Inter-university Consortium for Political and Social Research", + "official_name": "Inter-university Consortium for Political and Social Research" + }, + "IEEE.DATAPORT": { + "openaire_id": "re3data_____::r3d100012569", + "datacite_name": "IEEE DataPort", + "official_name": "IEEE DataPort" + }, + "IIASA.DARE": { + "openaire_id": "re3data_____::r3d100012932", + "datacite_name": "IIASA DARE", + "official_name": "IIASA DARE" + }, + "ILLINOIS.DATABANK": { + "openaire_id": "re3data_____::r3d100012001", + "datacite_name": "Illinois Data Bank", + "official_name": "Illinois Data Bank" + }, + "IRIS.IRIS": { + "openaire_id": "re3data_____::r3d100010268", + "datacite_name": "Incorporated Research Institutions for Seismology", + "official_name": "Incorporated Research Institutions for Seismology" + }, + "GESIS.INDEPTH": { + "openaire_id": "re3data_____::r3d100011392", + "datacite_name": "INDEPTH Data Repository", + "official_name": "INDEPTH Data Repository" + }, + "JCVI.GXPWAQ": { + "openaire_id": "re3data_____::r3d100011558", + "datacite_name": "Influenza Research Database", + "official_name": "Influenza Research Database" + }, + "TIB.INP": { + "openaire_id": "re3data_____::r3d100013120", + "datacite_name": "INPTDAT", + "official_name": "INPTDAT" + }, + "CERN.INSPIRE": { + "openaire_id": "re3data_____::r3d100011077", + "datacite_name": "Inspire-HEP", + "official_name": "Inspire-HEP" + }, + "INIST.IFREMER": { + "openaire_id": "re3data_____::r3d100012965", + "datacite_name": "IFREMER-SISMER Portail de données marines", + "official_name": "IFREMER-SISMER Portail de données marines" + }, + "INIST.ILL": { + "openaire_id": "re3data_____::r3d100012072", + "datacite_name": "ILL Data Portal", + "official_name": "ILL Data Portal" + }, + "TIB.KIT-IMK": { + "openaire_id": "re3data_____::r3d100011956", + "datacite_name": "CARIBIC", + "official_name": "CARIBIC" + }, + "WWPX.INTR2": { + "openaire_id": "re3data_____::r3d100012347", + "datacite_name": "²Dok[§]", + "official_name": "²Dok[§]" + }, + "BL.IITA": { + "openaire_id": "re3data_____::r3d100012883", + "datacite_name": "International Institute of Tropical Agriculture datasets", + "official_name": "International Institute of Tropical Agriculture datasets" + }, + "TIB.IPK": { + "openaire_id": "re3data_____::r3d100011647", + "datacite_name": "IPK Gatersleben", + "official_name": "IPK Gatersleben" + }, + "IST.REX": { + "openaire_id": "re3data_____::r3d100012394", + "datacite_name": "IST Austria Research Explorer", + "official_name": "IST Austria Research Explorer" + }, + "GDCC.JHU": { + "openaire_id": "re3data_____::r3d100011836", + "datacite_name": "Johns Hopkins Data Archive Dataverse Network", + "official_name": "Johns Hopkins Data Archive Dataverse Network" + }, + "KAGGLE.KAGGLE": { + "openaire_id": "re3data_____::r3d100012705", + "datacite_name": "Kaggle", + "official_name": "Kaggle" + }, + "ESTDOI.KEEL": { + "openaire_id": "re3data_____::r3d100011941", + "datacite_name": "Center of Estonian Language Resources", + "official_name": "Center of Estonian Language Resources" + }, + "RICE.KINDER": { + "openaire_id": "re3data_____::r3d100012884", + "datacite_name": "Kinder Institute Urban Data Platform", + "official_name": "Kinder Institute Urban Data Platform" + }, + "DELFT.KNMI": { + "openaire_id": "re3data_____::r3d100011879", + "datacite_name": "KNMI Data Platform", + "official_name": "KNMI Data Platform" + }, + "LANDCARE.SOILS": { + "openaire_id": "re3data_____::r3d100010835", + "datacite_name": "Land Resource Information Systems Portal", + "official_name": "Land Resource Information Systems Portal" + }, + "LANDCARE.GENERAL": { + "openaire_id": "re3data_____::r3d100011662", + "datacite_name": "Landcare Research Data Repository", + "official_name": "Landcare Research Data Repository" + }, + "TIB.LDEO": { + "openaire_id": "re3data_____::r3d100012547", + "datacite_name": "Lamont-Doherty Core Repository", + "official_name": "Lamont-Doherty Core Repository" + }, + "ZBMED.LERNZDB": { + "openaire_id": "re3data_____::r3d100010066", + "datacite_name": "figshare", + "official_name": "figshare" + }, + "GESIS.NEPS": { + "openaire_id": "re3data_____::r3d100010736", + "datacite_name": "Nationales Bildungspanel", + "official_name": "Nationales Bildungspanel" + }, + "BL.LINCOLN": { + "openaire_id": "re3data_____::r3d100012407", + "datacite_name": "Lincoln repository", + "official_name": "Lincoln repository" + }, + "LDC.CATALOG": { + "openaire_id": "re3data_____::r3d100011940", + "datacite_name": "Linguistic Data Consortium", + "official_name": "Linguistic Data Consortium" + }, + "ZBW.IFO": { + "openaire_id": "re3data_____::r3d100010201", + "datacite_name": "LMU-ifo Economics & Business Data Center", + "official_name": "LMU-ifo Economics & Business Data Center" + }, + "DK.SB": { + "openaire_id": "re3data_____::r3d100012617", + "datacite_name": "LOAR", + "official_name": "LOAR" + }, + "BL.LSHTM": { + "openaire_id": "re3data_____::r3d100011800", + "datacite_name": "LSHTM Data Compass", + "official_name": "LSHTM Data Compass" + }, + "BL.LBORO": { + "openaire_id": "re3data_____::r3d100012143", + "datacite_name": "Loughborough Data Repository", + "official_name": "Loughborough Data Repository" + }, + "DELFT.MAASTRO": { + "openaire_id": "re3data_____::r3d100011086", + "datacite_name": "CancerData.org", + "official_name": "CancerData.org" + }, + "OIBK.OHYCFA": { + "openaire_id": "re3data_____::r3d100013499", + "datacite_name": "Materials Data Repository", + "official_name": "Materials Data Repository" + }, + "MDW.REPOSITORY": { + "openaire_id": "re3data_____::r3d100012108", + "datacite_name": "mdw Repository", + "official_name": "mdw Repository" + }, + "ELSEVIER.MD": { + "openaire_id": "re3data_____::r3d100011868", + "datacite_name": "Mendeley Data", + "official_name": "Mendeley Data" + }, + "BL.MENDELEY": { + "openaire_id": "re3data_____::r3d100011868", + "datacite_name": "Mendeley Data", + "official_name": "Mendeley Data" + }, + "BKMX.AZJWZC": { + "openaire_id": "re3data_____::r3d100011394", + "datacite_name": "B2SHARE", + "official_name": "B2SHARE" + }, + "CSC.NRD": { + "openaire_id": "re3data_____::r3d100012157", + "datacite_name": "Fairdata IDA Research Data Storage Service", + "official_name": "Fairdata IDA Research Data Storage Service" + }, + "UMN.IPUMS": { + "openaire_id": "re3data_____::r3d100010794", + "datacite_name": "Minnesota Population Center", + "official_name": "Minnesota Population Center" + }, + "PHBI.REPO": { + "openaire_id": "re3data_____::r3d100010101", + "datacite_name": "MorphoBank", + "official_name": "MorphoBank" + }, + "TIB.UKON": { + "openaire_id": "re3data_____::r3d100010469", + "datacite_name": "Movebank Data Repository", + "official_name": "Movebank Data Repository" + }, + "INIST.HUMANUM": { + "openaire_id": "re3data_____::r3d100012102", + "datacite_name": "NAKALA", + "official_name": "NAKALA" + }, + "GDCC.NTU": { + "openaire_id": "re3data_____::r3d100012440", + "datacite_name": "DR-NTU (Data)", + "official_name": "DR-NTU (Data)" + }, + "CORNELL.NDACAN": { + "openaire_id": "re3data_____::r3d100011036", + "datacite_name": "National Data Archive on Child Abuse and Neglect", + "official_name": "National Data Archive on Child Abuse and Neglect" + }, + "NOAA.NCEI": { + "openaire_id": "re3data_____::r3d100011801", + "datacite_name": "NCEI", + "official_name": "NCEI" + }, + "GDCC.HARVARD-SLP": { + "openaire_id": "re3data_____::r3d100011861", + "datacite_name": "National Sleep Research Resource", + "official_name": "National Sleep Research Resource" + }, + "NSIDC.DATACTR": { + "openaire_id": "re3data_____::r3d100010110", + "datacite_name": "National Snow and Ice Data Center", + "official_name": "National Snow and Ice Data Center" + }, + "NUS.SB": { + "openaire_id": "re3data_____::r3d100012564", + "datacite_name": "ScholarBank@NUS", + "official_name": "ScholarBank@NUS" + }, + "BL.NHM": { + "openaire_id": "re3data_____::r3d100011675", + "datacite_name": "Natural History Museum, Data Portal", + "official_name": "Natural History Museum, Data Portal" + }, + "ESDIS.ORNL": { + "openaire_id": "re3data_____::r3d100000037", + "datacite_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics", + "official_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics" + }, + "INIST.OTELO": { + "openaire_id": "re3data_____::r3d100012505", + "datacite_name": "ORDaR", + "official_name": "ORDaR" + }, + "EUROP.ODIN": { + "openaire_id": "re3data_____::r3d100011378", + "datacite_name": "MatDB", + "official_name": "MatDB" + }, + "GDCC.ODUM-DV": { + "openaire_id": "re3data_____::r3d100000005", + "datacite_name": "UNC Dataverse", + "official_name": "UNC Dataverse" + }, + "OHSU.OHSU": { + "openaire_id": "re3data_____::r3d100012244", + "datacite_name": "OHSU Digital Commons", + "official_name": "OHSU Digital Commons" + }, + "KIM.OPENKIM": { + "openaire_id": "re3data_____::r3d100011864", + "datacite_name": "OpenKIM", + "official_name": "OpenKIM" + }, + "COS.OSF": { + "openaire_id": "re3data_____::r3d100011137", + "datacite_name": "Open Science Framework", + "official_name": "Open Science Framework" + }, + "SUL.OPENNEURO": { + "openaire_id": "re3data_____::r3d100010924", + "datacite_name": "OpenNeuro", + "official_name": "OpenNeuro" + }, + "BL.SHEF": { + "openaire_id": "re3data_____::r3d100012124", + "datacite_name": "ORDA - The University of Sheffield Research Data Catalogue and Repository", + "official_name": "ORDA - The University of Sheffield Research Data Catalogue and Repository" + }, + "BL.BROOKES": { + "openaire_id": "re3data_____::r3d100012929", + "datacite_name": "Oxford Brookes University: RADAR", + "official_name": "Oxford Brookes University: RADAR" + }, + "BL.OXDB": { + "openaire_id": "re3data_____::r3d100011653", + "datacite_name": "DataBank, Bodleian Libraries, University of Oxford", + "official_name": "DataBank, Bodleian Libraries, University of Oxford" + }, + "PANGAEA.REPOSITORY": { + "openaire_id": "re3data_____::r3d100010134", + "datacite_name": "PANGAEA", + "official_name": "PANGAEA" + }, + "NASAPDS.NASAPDS": { + "openaire_id": "re3data_____::r3d100010121", + "datacite_name": "PDS", + "official_name": "PDS" + }, + "BF.DISCOVER": { + "openaire_id": "re3data_____::r3d100013148", + "datacite_name": "Blackfynn Discover", + "official_name": "Blackfynn Discover" + }, + "MIT.PHYSIO": { + "openaire_id": "re3data_____::r3d100011561", + "datacite_name": "PhysioNet", + "official_name": "PhysioNet" + }, + "ZBMED.BIOFRESH": { + "openaire_id": "re3data_____::r3d100011651", + "datacite_name": "Freshwater Biodiversity Data Portal", + "official_name": "Freshwater Biodiversity Data Portal" + }, + "PDS.DATA": { + "openaire_id": "re3data_____::r3d100013015", + "datacite_name": "Project Data Sphere", + "official_name": "Project Data Sphere" + }, + "ESTDOI.QDB": { + "openaire_id": "re3data_____::r3d100013451", + "datacite_name": "QsarDB", + "official_name": "QsarDB" + }, + "INIST.ADISP": { + "openaire_id": "re3data_____::r3d100010494", + "datacite_name": "Quetelet PROGEDO Diffusion", + "official_name": "Quetelet PROGEDO Diffusion" + }, + "TIB.RADAR": { + "openaire_id": "re3data_____::r3d100012330", + "datacite_name": "RADAR", + "official_name": "RADAR" + }, + "UCHILE.DATAVERSE": { + "openaire_id": "re3data_____::r3d100013108", + "datacite_name": "Repositorio de Datos de Investigación de la Universidad de Chile", + "official_name": "Repositorio de Datos de Investigación de la Universidad de Chile" + }, + "UDR.RESEARCHDATA": { + "openaire_id": "re3data_____::r3d100013212", + "datacite_name": "Repositorio de datos de investigación de la Universidad del Rosario", + "official_name": "Repositorio de datos de investigación de la Universidad del Rosario" + }, + "GESIS.DIPF": { + "openaire_id": "re3data_____::r3d100010390", + "datacite_name": "Forschungsdatenzentrum Bildung", + "official_name": "Forschungsdatenzentrum Bildung" + }, + "RG.RG": { + "openaire_id": "re3data_____::r3d100012227", + "datacite_name": "ResearchGate Data", + "official_name": "ResearchGate Data" + }, + "INIST.RESIF": { + "openaire_id": "re3data_____::r3d100012222", + "datacite_name": "Résif Seismological Data Portal", + "official_name": "Résif Seismological Data Portal" + }, + "TIB.HZDR": { + "openaire_id": "re3data_____::r3d100012757", + "datacite_name": "RODARE", + "official_name": "RODARE" + }, + "OCUL.SPDV": { + "openaire_id": "re3data_____::r3d100010691", + "datacite_name": "Scholars Portal Dataverse", + "official_name": "Scholars Portal Dataverse" + }, + "PSU.SCHOLAR": { + "openaire_id": "re3data_____::r3d100010701", + "datacite_name": "ScholarSphere", + "official_name": "ScholarSphere" + }, + "TIB.BIKF": { + "openaire_id": "re3data_____::r3d100012379", + "datacite_name": "Senckenberg (meta) data portal", + "official_name": "Senckenberg (meta) data portal" + }, + "GESIS.SHARE": { + "openaire_id": "re3data_____::r3d100010430", + "datacite_name": "Survey of Health, Ageing and Retirement in Europe", + "official_name": "Survey of Health, Ageing and Retirement in Europe" + }, + "BL.HALLAM": { + "openaire_id": "re3data_____::r3d100011909", + "datacite_name": "Sheffield Hallam University Research Data Archive", + "official_name": "Sheffield Hallam University Research Data Archive" + }, + "ETHZ.SICAS": { + "openaire_id": "re3data_____::r3d100011560", + "datacite_name": "Sicas Medical Image Repository", + "official_name": "Sicas Medical Image Repository" + }, + "SUL.SIMTK": { + "openaire_id": "re3data_____::r3d100012486", + "datacite_name": "SimTK", + "official_name": "SimTK" + }, + "SI.SI": { + "openaire_id": "re3data_____::r3d100012274", + "datacite_name": "Smithsonian Research Online", + "official_name": "Smithsonian Research Online" + }, + "CONCOR.KCYDCU": { + "openaire_id": "re3data_____::r3d100012818", + "datacite_name": "Spectrum Research Repository", + "official_name": "Spectrum Research Repository" + }, + "SUL.SDR": { + "openaire_id": "re3data_____::r3d100010710", + "datacite_name": "Stanford Digital Repository", + "official_name": "Stanford Digital Repository" + }, + "SND.SU": { + "openaire_id": "re3data_____::r3d100012147", + "datacite_name": "Stockholm University Figshare Repository", + "official_name": "Stockholm University Figshare Repository" + }, + "INIST.CDS": { + "openaire_id": "re3data_____::r3d100010584", + "datacite_name": "Strasbourg Astronomical Data Center", + "official_name": "Strasbourg Astronomical Data Center" + }, + "DELFT.SURFSARA": { + "openaire_id": "re3data_____::r3d100013084", + "datacite_name": "SURF Data Repository", + "official_name": "SURF Data Repository" + }, + "SND.SMHI": { + "openaire_id": "re3data_____::r3d100011776", + "datacite_name": "Swedish Meteorological and Hydrological Institute open data", + "official_name": "Swedish Meteorological and Hydrological Institute open data" + }, + "SND.SND": { + "openaire_id": "re3data_____::r3d100010146", + "datacite_name": "Swedish National Data Service", + "official_name": "Swedish National Data Service" + }, + "SAGEBIO.SYNAPSE": { + "openaire_id": "re3data_____::r3d100011894", + "datacite_name": "Synapse", + "official_name": "Synapse" + }, + "GDCC.SYR-QDR": { + "openaire_id": "re3data_____::r3d100011038", + "datacite_name": "Qualitative Data Repository", + "official_name": "Qualitative Data Repository" + }, + "FZJ.TERENO": { + "openaire_id": "re3data_____::r3d100011471", + "datacite_name": "TERENO Data Discovery Portal", + "official_name": "TERENO Data Discovery Portal" + }, + "TUW.TETHYS": { + "openaire_id": "re3data_____::r3d100013400", + "datacite_name": "Tethys", + "official_name": "Tethys" + }, + "GESIS.AUSSDA": { + "openaire_id": "re3data_____::r3d100010483", + "datacite_name": "AUSSDA Dataverse", + "official_name": "AUSSDA Dataverse" + }, + "CCDC.CSD": { + "openaire_id": "re3data_____::r3d100010197", + "datacite_name": "The Cambridge Structural Database", + "official_name": "The Cambridge Structural Database" + }, + "SML.TCIA": { + "openaire_id": "re3data_____::r3d100011559", + "datacite_name": "The Cancer Imaging Archive", + "official_name": "The Cancer Imaging Archive" + }, + "SI.CDA": { + "openaire_id": "re3data_____::r3d100010035", + "datacite_name": "The Chandra Data Archive", + "official_name": "The Chandra Data Archive" + }, + "HLQC.ZNXELI": { + "openaire_id": "re3data_____::r3d100013029", + "datacite_name": "TUdatalib", + "official_name": "TUdatalib" + }, + "TIB.TUHH": { + "openaire_id": "re3data_____::r3d100013076", + "datacite_name": "TUHH Open Research - Research Data TUHH", + "official_name": "TUHH Open Research - Research Data TUHH" + }, + "BL.UEL": { + "openaire_id": "re3data_____::r3d100012414", + "datacite_name": "UEL Research Repository", + "official_name": "UEL Research Repository" + }, + "ARFM.UFZDRP": { + "openaire_id": "re3data_____::r3d100013674", + "datacite_name": "Datenrechercheportal UFZ", + "official_name": "Datenrechercheportal UFZ" + }, + "BL.UKDA": { + "openaire_id": "re3data_____::r3d100010215", + "datacite_name": "UK Data Archive", + "official_name": "UK Data Archive" + }, + "GDCC.ODUM-LIBRARY": { + "openaire_id": "re3data_____::r3d100000005", + "datacite_name": "UNC Dataverse", + "official_name": "UNC Dataverse" + }, + "CRUI.UNIBO": { + "openaire_id": "re3data_____::r3d100012604", + "datacite_name": "AMS Acta", + "official_name": "AMS Acta" + }, + "LMU.UB": { + "openaire_id": "re3data_____::r3d100010731", + "datacite_name": "Open Data LMU", + "official_name": "Open Data LMU" + }, + "INIST.IFSTTAR": { + "openaire_id": "re3data_____::r3d100013062", + "datacite_name": "Data Univ Gustave Eiffel", + "official_name": "Data Univ Gustave Eiffel" + }, + "BL.UCLD": { + "openaire_id": "re3data_____::r3d100012417", + "datacite_name": "UCL Discovery", + "official_name": "UCL Discovery" + }, + "NZAU.DATA": { + "openaire_id": "re3data_____::r3d100012110", + "datacite_name": "University of Auckland Data Repository", + "official_name": "University of Auckland Data Repository" + }, + "BL.BATH": { + "openaire_id": "re3data_____::r3d100011947", + "datacite_name": "University of Bath Research Data Archive", + "official_name": "University of Bath Research Data Archive" + }, + "BL.BRISTOL": { + "openaire_id": "re3data_____::r3d100011099", + "datacite_name": "data.bris Research Data Repository", + "official_name": "data.bris Research Data Repository" + }, + "FIGSHARE.UCT": { + "openaire_id": "re3data_____::r3d100012633", + "datacite_name": "ZivaHub", + "official_name": "ZivaHub" + }, + "BL.UCLAN": { + "openaire_id": "re3data_____::r3d100012019", + "datacite_name": "UCLanData", + "official_name": "UCLanData" + }, + "BL.ED": { + "openaire_id": "re3data_____::r3d100000047", + "datacite_name": "Edinburgh DataShare", + "official_name": "Edinburgh DataShare" + }, + "BL.ESSEX": { + "openaire_id": "re3data_____::r3d100012405", + "datacite_name": "Research Data at Essex", + "official_name": "Research Data at Essex" + }, + "BL.EXETER": { + "openaire_id": "re3data_____::r3d100011202", + "datacite_name": "Open Research Exeter", + "official_name": "Open Research Exeter" + }, + "BL.HERTS": { + "openaire_id": "re3data_____::r3d100013116", + "datacite_name": "University of Hertfordshire Research Archive", + "official_name": "University of Hertfordshire Research Archive" + }, + "NKN.NKN": { + "openaire_id": "re3data_____::r3d100011587", + "datacite_name": "Northwest Knowledge Network", + "official_name": "Northwest Knowledge Network" + }, + "BL.LEEDS": { + "openaire_id": "re3data_____::r3d100011945", + "datacite_name": "Research Data Leeds Repository", + "official_name": "Research Data Leeds Repository" + }, + "UNIMELB.REPO1": { + "openaire_id": "re3data_____::r3d100012145", + "datacite_name": "melbourne.figshare.com", + "official_name": "melbourne.figshare.com" + }, + "BL.READING": { + "openaire_id": "re3data_____::r3d100012064", + "datacite_name": "University of Reading Research Data Archive", + "official_name": "University of Reading Research Data Archive" + }, + "BL.SALFORD": { + "openaire_id": "re3data_____::r3d100012144", + "datacite_name": "University of Salford Data Repository", + "official_name": "University of Salford Data Repository" + }, + "BL.SOTON": { + "openaire_id": "re3data_____::r3d100011245", + "datacite_name": "University of Southampton Institutional Research Repository", + "official_name": "University of Southampton Institutional Research Repository" + }, + "ARDCX.USQ": { + "openaire_id": "re3data_____::r3d100011638", + "datacite_name": "University of Southern Queensland research data collection", + "official_name": "University of Southern Queensland research data collection" + }, + "BL.STANDREW": { + "openaire_id": "re3data_____::r3d100012411", + "datacite_name": "St Andrews Research portal - Research Data", + "official_name": "St Andrews Research portal - Research Data" + }, + "BL.STRATH": { + "openaire_id": "re3data_____::r3d100012412", + "datacite_name": "University of Strathclyde KnowledgeBase Datasets", + "official_name": "University of Strathclyde KnowledgeBase Datasets" + }, + "BL.SURREY": { + "openaire_id": "re3data_____::r3d100012232", + "datacite_name": "Surrey Research Insight", + "official_name": "Surrey Research Insight" + }, + "USDA.USDA": { + "openaire_id": "re3data_____::r3d100011890", + "datacite_name": "Ag Data Commons", + "official_name": "Ag Data Commons" + }, + "USGS.PROD": { + "openaire_id": "re3data_____::r3d100010054", + "datacite_name": "U.S. Geological Survey", + "official_name": "U.S. Geological Survey" + }, + "DELFT.UU": { + "openaire_id": "re3data_____::r3d100012623", + "datacite_name": "Yoda", + "official_name": "Yoda" + }, + "VT.VTECHDATA": { + "openaire_id": "re3data_____::r3d100012601", + "datacite_name": "Virginia Tech Data Repository", + "official_name": "Virginia Tech Data Repository" + }, + "JCVI.EIVBWB": { + "openaire_id": "re3data_____::r3d100011931", + "datacite_name": "Virus Pathogen Resource", + "official_name": "Virus Pathogen Resource" + }, + "VIVLI.SEARCH": { + "openaire_id": "re3data_____::r3d100012823", + "datacite_name": "Vivli", + "official_name": "Vivli" + }, + "DELFT.VLIZ": { + "openaire_id": "re3data_____::r3d100010661", + "datacite_name": "Flanders Marine Institute", + "official_name": "Flanders Marine Institute" + }, + "WH.WHOAS": { + "openaire_id": "re3data_____::r3d100010423", + "datacite_name": "Woods Hole Open Access Server", + "official_name": "Woods Hole Open Access Server" + }, + "DKRZ.WDCC": { + "openaire_id": "re3data_____::r3d100010299", + "datacite_name": "World Data Center for Climate", + "official_name": "World Data Center for Climate" + }, + "ETHZ.WGMS": { + "openaire_id": "re3data_____::r3d100010627", + "datacite_name": "World Glacier Monitoring Service", + "official_name": "World Glacier Monitoring Service" + }, + "ZBW.ZBW-JDA": { + "openaire_id": "re3data_____::r3d100012190", + "datacite_name": "ZBW Journal Data Archive", + "official_name": "ZBW Journal Data Archive" + }, + "CERN.ZENODO": { + "openaire_id": "re3data_____::r3d100010468", + "datacite_name": "Zenodo", + "official_name": "Zenodo" + }, + "ZBW.ZEW": { + "openaire_id": "re3data_____::r3d100010399", + "datacite_name": "ZEW Forschungsdatenzentrum", + "official_name": "ZEW Forschungsdatenzentrum" + } } \ No newline at end of file