diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml
index 012ff89a3..44165995d 100644
--- a/dhp-build/dhp-build-assembly-resources/pom.xml
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-build-assembly-resources
diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
index 256017e2c..7579bdf45 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-build-properties-maven-plugin
diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml
index db0097d64..5a86efe17 100644
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp-code-style
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
jar
@@ -47,12 +47,16 @@
org.apache.maven.plugins
maven-site-plugin
3.9.1
+
+ true
+
+
UTF-8
sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop
diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml
index 97fbdf45b..9040ea94e 100644
--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@@ -4,7 +4,7 @@
eu.dnetlib.dhp
dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-build
pom
diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index 7b18f0105..6df11f4ea 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
../pom.xml
diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml
index f63f3aa01..29e1fab1f 100644
--- a/dhp-workflows/dhp-actionmanager/pom.xml
+++ b/dhp-workflows/dhp-actionmanager/pom.xml
@@ -4,7 +4,7 @@
eu.dnetlib.dhp
dhp-workflows
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-actionmanager
diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml
index c89cc9d1d..53d349d2a 100644
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@@ -4,7 +4,7 @@
eu.dnetlib.dhp
dhp-workflows
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-aggregation
diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
index 136b0fb93..8e41de83c 100644
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
@@ -72,7 +72,6 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
spark.read.load(targetPath).printSchema();
-
val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
result
diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml
index 047767d33..479a9e8c6 100644
--- a/dhp-workflows/dhp-blacklist/pom.xml
+++ b/dhp-workflows/dhp-blacklist/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml
index afbd93dcb..5a41a0090 100644
--- a/dhp-workflows/dhp-broker-events/pom.xml
+++ b/dhp-workflows/dhp-broker-events/pom.xml
@@ -1,11 +1,9 @@
-
+
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml
index f47f8ba6e..a14076a78 100644
--- a/dhp-workflows/dhp-dedup-openaire/pom.xml
+++ b/dhp-workflows/dhp-dedup-openaire/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
dhp-dedup-openaire
diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml
index 8c10538c0..c3d3a7375 100644
--- a/dhp-workflows/dhp-distcp/pom.xml
+++ b/dhp-workflows/dhp-distcp/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml
index f496ea9a2..37accbc4f 100644
--- a/dhp-workflows/dhp-doiboost/pom.xml
+++ b/dhp-workflows/dhp-doiboost/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
index 0cb08ea94..b4d6d67e2 100644
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@@ -446,16 +446,12 @@ case object Crossref2Oaf {
case "10.13039/501100000781" =>
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
- case "10.13039/100000001" =>
- generateSimpleRelationFromAward(funder, "nsf_________", a => a)
- case "10.13039/501100001665" =>
- generateSimpleRelationFromAward(funder, "anr_________", a => a)
- case "10.13039/501100002341" =>
- generateSimpleRelationFromAward(funder, "aka_________", a => a)
+ case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a)
+ case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
+ case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a)
case "10.13039/501100001602" =>
- generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", ""))
- case "10.13039/501100000923" =>
- generateSimpleRelationFromAward(funder, "arc_________", a => a)
+ generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""))
+ case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
case "10.13039/501100000038" =>
val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
@@ -468,14 +464,10 @@ case object Crossref2Oaf {
val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
- case "10.13039/501100002848" =>
- generateSimpleRelationFromAward(funder, "conicytf____", a => a)
- case "10.13039/501100003448" =>
- generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
- case "10.13039/501100010198" =>
- generateSimpleRelationFromAward(funder, "sgov________", a => a)
- case "10.13039/501100004564" =>
- generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
+ case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
+ case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
+ case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a)
+ case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
case "10.13039/501100003407" =>
generateSimpleRelationFromAward(funder, "miur________", a => a)
val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63")
@@ -487,15 +479,11 @@ case object Crossref2Oaf {
"irb_hr______",
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", "")
)
- case "10.13039/501100006769" =>
- generateSimpleRelationFromAward(funder, "rsf_________", a => a)
- case "10.13039/501100001711" =>
- generateSimpleRelationFromAward(funder, "snsf________", snsfRule)
- case "10.13039/501100004410" =>
- generateSimpleRelationFromAward(funder, "tubitakf____", a => a)
- case "10.10.13039/100004440" =>
- generateSimpleRelationFromAward(funder, "wt__________", a => a)
+ case "10.13039/501100006769" => generateSimpleRelationFromAward(funder, "rsf_________", a => a)
+ case "10.13039/501100001711" => generateSimpleRelationFromAward(funder, "snsf________", snsfRule)
+ case "10.13039/501100004410" => generateSimpleRelationFromAward(funder, "tubitakf____", a => a)
case "10.13039/100004440" =>
+ generateSimpleRelationFromAward(funder, "wt__________", a => a)
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
@@ -516,6 +504,7 @@ case object Crossref2Oaf {
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
case "Wellcome Trust Masters Fellowship" =>
+ generateSimpleRelationFromAward(funder, "wt__________", a => a)
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml
index 0b4269acd..591cad252 100644
--- a/dhp-workflows/dhp-enrichment/pom.xml
+++ b/dhp-workflows/dhp-enrichment/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
@@ -51,7 +51,7 @@
eu.dnetlib.dhp
dhp-aggregation
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
compile
diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
index 202c86a6d..271ccbf72 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
@@ -274,7 +274,6 @@
--sourcePath${sourcePath}/publication
--preparedInfoPath${workingDir}/publication
- --saveGraph${saveGraph}
--resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
--outputPath${outputPath}/publication
@@ -304,7 +303,6 @@
--sourcePath${sourcePath}/dataset
--preparedInfoPath${workingDir}/dataset
- --saveGraph${saveGraph}
--resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
--outputPath${outputPath}/dataset
@@ -334,7 +332,6 @@
--sourcePath${sourcePath}/otherresearchproduct
--preparedInfoPath${workingDir}/otherresearchproduct
- --saveGraph${saveGraph}
--resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--outputPath${outputPath}/otherresearchproduct
@@ -364,7 +361,6 @@
--sourcePath${sourcePath}/software
--preparedInfoPath${workingDir}/software
- --saveGraph${saveGraph}
--resultTableNameeu.dnetlib.dhp.schema.oaf.Software
--outputPath${outputPath}/software
diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml
index 17146903a..687f0de66 100644
--- a/dhp-workflows/dhp-graph-mapper/pom.xml
+++ b/dhp-workflows/dhp-graph-mapper/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java
new file mode 100644
index 000000000..b20dcb67b
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java
@@ -0,0 +1,113 @@
+package eu.dnetlib.dhp.oa.graph.clean;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Context;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+public class CleanContextSparkJob implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(CleanContextSparkJob.class);
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static void main(String[] args) throws Exception {
+
+ String jsonConfiguration = IOUtils
+ .toString(
+ CleanContextSparkJob.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json"));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ String inputPath = parser.get("inputPath");
+ log.info("inputPath: {}", inputPath);
+
+ String workingPath = parser.get("workingPath");
+ log.info("workingPath: {}", workingPath);
+
+ String contextId = parser.get("contextId");
+ log.info("contextId: {}", contextId);
+
+ String verifyParam = parser.get("verifyParam");
+ log.info("verifyParam: {}", verifyParam);
+
+
+ String graphTableClassName = parser.get("graphTableClassName");
+ log.info("graphTableClassName: {}", graphTableClassName);
+
+ Class extends Result> entityClazz = (Class extends Result>) Class.forName(graphTableClassName);
+
+
+ SparkConf conf = new SparkConf();
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+
+ cleanContext(spark, contextId, verifyParam, inputPath, entityClazz, workingPath);
+ });
+ }
+
+ private static void cleanContext(SparkSession spark, String contextId, String verifyParam, String inputPath, Class entityClazz, String workingPath) {
+ Dataset res = spark
+ .read()
+ .textFile(inputPath)
+ .map(
+ (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz),
+ Encoders.bean(entityClazz));
+
+ res.map((MapFunction) r -> {
+ if(!r.getTitle()
+ .stream()
+ .filter(t -> t.getQualifier().getClassid()
+ .equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid()))
+ .anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()))){
+ return r;
+ }
+ r.setContext(r.getContext().stream().filter(c -> !c.getId().split("::")[0]
+ .equalsIgnoreCase(contextId)).collect(Collectors.toList()));
+ return r;
+ } ,Encoders.bean(entityClazz))
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression","gzip")
+ .json(workingPath);
+
+ spark.read().textFile(workingPath).map((MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz),
+ Encoders.bean(entityClazz))
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression","gzip")
+ .json(inputPath);
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java
index f7c0d501f..4de687ec7 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java
@@ -4,8 +4,6 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj;
import java.io.Serializable;
import java.util.List;
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
public class BibJson implements Serializable {
private Editorial editorial;
private PidScheme pid_scheme;
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json
new file mode 100644
index 000000000..e3d31d69f
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json
@@ -0,0 +1,37 @@
+[
+ {
+ "paramName": "issm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "when true will stop SparkSession after job execution",
+ "paramRequired": false
+ },
+ {
+ "paramName": "in",
+ "paramLongName": "inputPath",
+ "paramDescription": "the path to the graph data dump to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "wp",
+ "paramLongName": "workingPath",
+ "paramDescription": "the path to store the output graph",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ci",
+ "paramLongName": "contextId",
+ "paramDescription": "the id of the context to be removed",
+ "paramRequired": true
+ },
+ {
+ "paramName": "class",
+ "paramLongName": "graphTableClassName",
+ "paramDescription": "class name moelling the graph table",
+ "paramRequired": true
+ },{
+ "paramName": "vf",
+ "paramLongName": "verifyParam",
+ "paramDescription": "the parameter to be verified to remove the context",
+ "paramRequired": true
+}
+]
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java
new file mode 100644
index 000000000..bba814346
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java
@@ -0,0 +1,159 @@
+package eu.dnetlib.dhp.oa.graph.clean;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import eu.dnetlib.dhp.oa.graph.dump.Constants;
+import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest;
+import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.dump.oaf.Instance;
+import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.Software;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Locale;
+
+public class CleanContextTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class);
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(DumpJobTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(DumpJobTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void testResultClean() throws Exception {
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json")
+ .getPath();
+ final String prefix = "gcube ";
+
+
+ spark.read().textFile(sourcePath).map((MapFunction) r -> OBJECT_MAPPER.readValue(r, Publication.class), Encoders.bean(Publication.class))
+ .write().json(workingDir.toString() + "/publication");
+
+
+ CleanContextSparkJob.main(new String[] {
+ "--isSparkSessionManaged", Boolean.FALSE.toString(),
+ "--inputPath", workingDir.toString() + "/publication",
+ "-graphTableClassName", Publication.class.getCanonicalName(),
+ "-workingPath", workingDir.toString() + "/working",
+ "-contextId","sobigdata",
+ "-verifyParam","gCube "
+ });
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/publication")
+ .map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
+
+ Assertions.assertEquals(7, tmp.count());
+
+ //original result with sobigdata context and gcube as starting string in the main title for the publication
+ Assertions.assertEquals(0,
+ tmp.filter(p->p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")).collect().get(0).getContext().size());
+
+ //original result with sobigdata context without gcube as starting string in the main title for the publication
+ Assertions.assertEquals(1,
+ tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().size());
+ Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().get(0).getId() );
+
+ //original result with sobigdata context with gcube as starting string in the subtitle
+ Assertions.assertEquals(1,
+ tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().size());
+ Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().get(0).getId() );
+ List titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getTitle();
+ Assertions.assertEquals(1, titles.size());
+ Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) );
+ Assertions.assertEquals("subtitle", titles.get(0).getQualifier().getClassid());
+
+ //original result with sobigdata context with gcube not as starting string in the main title
+ Assertions.assertEquals(1,
+ tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().size());
+ Assertions.assertEquals("sobigdata::projects::1",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().get(0).getId() );
+ titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getTitle();
+ Assertions.assertEquals(1, titles.size());
+ Assertions.assertFalse(titles.get(0).getValue().toLowerCase().startsWith(prefix) );
+ Assertions.assertTrue(titles.get(0).getValue().toLowerCase().contains(prefix.trim()) );
+ Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid());
+
+ //original result with sobigdata in context and also other contexts with gcube as starting string for the main title
+ Assertions.assertEquals(1,
+ tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().size());
+ Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().get(0).getId() );
+ titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getTitle();
+ Assertions.assertEquals(1, titles.size());
+ Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) );
+ Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid());
+
+ //original result with multiple main title one of which whith gcube as starting string and with 2 contextes
+ Assertions.assertEquals(1,
+ tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().size());
+ Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().get(0).getId() );
+ titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getTitle();
+ Assertions.assertEquals(2, titles.size());
+ Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix)) );
+
+
+ //original result without sobigdata in context with gcube as starting string for the main title
+ Assertions.assertEquals(1,
+ tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().size());
+ Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().get(0).getId() );
+ titles = tmp.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getTitle();
+ Assertions.assertEquals(2, titles.size());
+
+ Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix)));
+
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json
new file mode 100644
index 000000000..b7c51d810
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json
@@ -0,0 +1,7 @@
+{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"8250"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Gcube veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null}
+{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:03:57.761Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount": {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1396"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087415,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:58:39Z","harvestDate":"2020-05-25T11:34:38.707Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800020324","metadataNamespace":""}},"originalId":["DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null}
+{"author":[{"affiliation":[],"fullname":"van Someren, Christian","name":"Christian","pid":[],"rank":1,"surname":"van Someren"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:32.27Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over het Energieakkoord. In het energieakkoord voor duurzame groei is afgesproken dat in 2020 14 procent van de opwek hernieuwbaar moet zijn en in 2023 16 procent. De doelstelling is een uitdagende opgave waarbij de eerste vraag is: \"Hoeveel hernieuwbare energie wordt er op dit moment opgewekt in Nederland?\" Deze website geeft antwoord op de vraag voor de actueel opgewekte windenergie, zonne-energie en biogas."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["http://energieopwek.nl/"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282663379,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-08T17:46:55Z","harvestDate":"2020-05-25T15:30:24.079Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/hbo:oai:hbokennisbank.nl:hanzepure:oai:research.hanze.nl:publications/813a5dfa-4fd0-44c4-8cbf-310324dc724d","metadataNamespace":""}},"originalId":["DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0038","classname":"0038","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"energieproductie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Management"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Policy and Law"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Energie interventies en gedrag"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"publieke ondersteuning en communicatie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Professional practice & society"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"subtitle","classname":"subtitle","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"GCUBE opwek"}],"journal":null}
+{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:40.568Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282670417,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:41:34Z","harvestDate":"2020-05-25T11:40:05.974Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550052278","metadataNamespace":""}},"originalId":["DansKnawCris::3c9f068ddc930360bec6925488a9a97f"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None GCUBE"}],"journal":null}
+{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:11:57.737Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met lit. opg"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282758835,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:36:00Z","harvestDate":"2020-05-25T11:30:47.199Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800007467","metadataNamespace":""}},"originalId":["DansKnawCris::4669a378a73661417182c208e6fdab53"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Vlaardingen"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gCube RAAP-rapport, ISSN 0925-6229 2089"}],"journal":null}
+{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:18:18.049Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met literatuuropgave"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283010899,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:48:12Z","harvestDate":"2020-05-25T11:32:46.363Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800014509","metadataNamespace":""}},"originalId":["DansKnawCris::4a9152e80f860eab99072e921d74a0ff"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proefsleuvenonderzoek aan de Bredasebaan 8 te Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube Archeodienst rapport, ISSN 1877-2900 565"}],"journal":null}
+{"author":[{"affiliation":null,"fullname":"Schubart, A.","name":"A.","pid":[],"rank":1,"surname":"Schubart"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"n/a"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":null,"value":"cc0"},"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2578.35"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["https://zenodo.org/record/1634826/files/article.pdf"]},{"accessright":null,"collectedfrom":{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1866-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1634826"]}],"language":null,"lastupdatetimestamp":1591290751234,"oaiprovenance":null,"originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1524/phil.1866.24.14.561"}],"publisher":{"dataInfo":null,"value":"Walter de Gruyter GmbH"},"relevantdate":[{"dataInfo":null,"qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-03-26T10:09:50Z"},{"dataInfo":null,"qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1866-01-01"}],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":null,"value":"Crossref"},{"dataInfo":null,"value":null}],"subject":[{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theology"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"History"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Art history"}],"title":[{"dataInfo":null,"qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias"},{"dataInfo":null,"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":null,"edition":null,"ep":null,"iss":null,"issnLinking":null,"issnOnline":"2196-7008","issnPrinted":"0031-7985","name":"Philologus","sp":null,"vol":"24"}}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml
index e402d0600..ae8f3b53a 100644
--- a/dhp-workflows/dhp-graph-provision/pom.xml
+++ b/dhp-workflows/dhp-graph-provision/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml
index c64c2f58e..ce3e739a5 100644
--- a/dhp-workflows/dhp-stats-promote/pom.xml
+++ b/dhp-workflows/dhp-stats-promote/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
dhp-stats-promote
diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml
index 52f35ff07..2bc610c42 100644
--- a/dhp-workflows/dhp-stats-update/pom.xml
+++ b/dhp-workflows/dhp-stats-update/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
dhp-stats-update
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
index a95ee74d8..24e6bff7e 100755
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
@@ -12,6 +12,8 @@ and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
on p.id= tmp.id;
+compute stats indi_pub_green_oa;
+
create table indi_pub_grey_lit stored as parquet as
select distinct p.id, coalesce(grey_lit, 0) as grey_lit
from publication p
@@ -22,6 +24,8 @@ join result_classifications rt on rt.id = p.id
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id;
+compute stats indi_pub_grey_lit;
+
create table indi_pub_doi_from_crossref stored as parquet as
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
from publication p
@@ -31,6 +35,7 @@ join datasource d on d.id = ri.collectedfrom
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
on tmp.id=p.id;
+compute stats indi_pub_doi_from_crossref;
---- Sprint 2 ----
create table indi_result_has_cc_licence stored as parquet as
select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
@@ -40,6 +45,8 @@ join result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
on r.id= tmp.id;
+compute stats indi_result_has_cc_licence;
+
create table indi_result_has_cc_licence_url stored as parquet as
select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from result r
@@ -49,16 +56,21 @@ join result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
on r.id= tmp.id;
+compute stats indi_result_has_cc_licence_url;
+
create table indi_pub_has_abstract stored as parquet as
select distinct publication.id, coalesce(abstract, 1) has_abstract
from publication;
+compute stats indi_pub_has_abstract;
+
create table indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp
on r.id= tmp.id;
+compute stats indi_result_with_orcid;
---- Sprint 3 ----
create table indi_funded_result_with_fundref stored as parquet as
@@ -68,6 +80,8 @@ left outer join (select distinct id, 1 as fundref from project_results
where provenance='Harvested') tmp
on r.id= tmp.id;
+compute stats indi_funded_result_with_fundref;
+
create table indi_result_org_country_collab stored as parquet as
with tmp as
(select o.id as id, o.country , ro.id as result,r.type from organization o
@@ -79,6 +93,8 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id and o1.country<>o2.country
group by o1.id, o1.type,o2.country;
+compute stats indi_result_org_country_collab;
+
create table indi_result_org_collab stored as parquet as
with tmp as
(select o.id, ro.id as result,r.type from organization o
@@ -90,6 +106,8 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id
group by o1.id, o2.id, o1.type;
+compute stats indi_result_org_collab;
+
create table indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from organization_projects op
join organization o on o.id=op.id
@@ -101,6 +119,8 @@ join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country;
+compute stats indi_funder_country_collab;
+
create table indi_result_country_collab stored as parquet as
with tmp as
(select country, ro.id as result,r.type from organization o
@@ -112,6 +132,8 @@ join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country
group by o1.country, o2.country, o1.type;
+compute stats indi_result_country_collab;
+
---- Sprint 4 ----
create table indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
@@ -123,6 +145,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id;
+compute stats indi_pub_diamond;
+
create table indi_pub_hybrid stored as parquet as
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
from publication_datasources pd
@@ -133,6 +157,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
on pd.id=tmp.id;
+compute stats indi_pub_hybrid;
+
create table indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from publication pd
@@ -143,6 +169,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and ps.is_transformative_journal=true) tmp
on pd.id=tmp.id;
+compute stats indi_pub_in_transformative;
+
create table indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri
left outer join
@@ -153,11 +181,12 @@ where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id;
-
+compute stats indi_pub_closed_other_open;
---- Sprint 5 ----
create table indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from result_instance group by id;
+compute stats indi_result_no_of_copies;
---- Sprint 6 ----
create table indi_pub_gold_oa stored as parquet as
WITH gold_oa AS (
@@ -183,6 +212,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
+compute stats indi_pub_gold_oa;
+
create table indi_datasets_gold_oa stored as parquet as
WITH gold_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@@ -210,6 +241,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
+compute stats indi_datasets_gold_oa;
+
create table indi_software_gold_oa stored as parquet as
WITH gold_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@@ -237,6 +270,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
+compute stats indi_software_gold_oa;
+
create table indi_org_findable stored as parquet as
with result_with_pid as (
select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
@@ -263,6 +298,8 @@ join result_with_pid_share on result_with_pid_share.organization=allresults.orga
left outer join (
select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization;
+compute stats indi_org_findable;
+
create table indi_org_openess stored as parquet as
WITH datasets_oa as (
SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg
@@ -313,6 +350,8 @@ left outer join (
left outer join (
select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization;
+compute stats indi_org_openess;
+
create table indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
@@ -343,6 +382,8 @@ LEFT OUTER JOIN (
JOIN indi_result_has_cc_licence cc on pd.id=cc.id
where cc.has_cc_license=1) tmp on pd.id=tmp.id;
+compute stats indi_pub_hybrid_oa_with_cc;
+
create table indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id
@@ -350,6 +391,8 @@ where downloads>0
GROUP BY result_id
order by no_dowloads desc;
+compute stats indi_pub_downloads;
+
create table indi_pub_downloads_datasource stored as parquet as
SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id
@@ -357,15 +400,21 @@ where downloads>0
GROUP BY result_id, repository_id
order by result_id;
+compute stats indi_pub_downloads_datasource;
+
create table indi_pub_downloads_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id where downloads>0
GROUP BY result_id, `year`
order by `year` asc;
+compute stats indi_pub_downloads_year;
+
create table indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id, `year`
order by `year` asc, result_id;
+
+compute stats indi_pub_downloads_datasource_year;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml
index a78f92d41..954c8bd39 100644
--- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml
+++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
dhp-usage-raw-data-update
diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml
index 20d2f5b76..54e18580b 100644
--- a/dhp-workflows/dhp-usage-stats-build/pom.xml
+++ b/dhp-workflows/dhp-usage-stats-build/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
dhp-usage-stats-build
diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml
index b1c51c497..8c71a5ca1 100644
--- a/dhp-workflows/dhp-workflow-profiles/pom.xml
+++ b/dhp-workflows/dhp-workflow-profiles/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml
index 143178560..541d59007 100644
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index 6a14954e5..136b9b867 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
4.0.0
eu.dnetlib.dhp
dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
pom
@@ -551,6 +551,9 @@
org.apache.maven.plugins
maven-site-plugin
3.9.1
+
+ ${dhp.site.skip}
+
@@ -791,6 +794,7 @@
2.4.0.cloudera2
2.9.6
3.5
+ true
11.0.2
2.11.12
5.6.1