added filter for typenorm field

minor
base transformation rule test
2024-03-12 10:09:38 +01:00 · 2024-03-12 09:20:37 +01:00 · 2024-03-11 14:48:37 +01:00 · 2024-03-11 11:44:04 +01:00 · 2024-03-11 10:50:17 +01:00 · 2024-03-08 15:31:08 +01:00
86 changed files with 1596 additions and 3998 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@ -312,8 +312,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
 		}

 		if (value instanceof Datasource) {
-			final Datasource d = (Datasource) value;
-			return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank(d.getOfficialname().getValue());
+			// nothing to evaluate here
 		} else if (value instanceof Project) {
 			final Project p = (Project) value;
 			return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue());
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java
@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2024.
- * SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-package eu.dnetlib.dhp.actionmanager.promote;
-
-/** Encodes the Actionset promotion strategies */
-public class PromoteAction {
-
-	/** The supported actionset promotion strategies
-	 *
-	 * ENRICH: promotes only records in the actionset matching another record in the
-	 *  graph and enriches them applying the given MergeAndGet strategy
-	 * UPSERT: promotes all the records in an actionset, matching records are updated
-	 *  using the given MergeAndGet strategy, the non-matching record as inserted as they are.
-	 */
-	public enum Strategy {
-		ENRICH, UPSERT
-	}
-
-	/**
-	 * Returns the string representation of the join type implementing the given PromoteAction.
-	 *
-	 * @param strategy the strategy to be used to promote the Actionset contents
-	 * @return the join type used to implement the promotion strategy
-	 */
-	public static String joinTypeForStrategy(PromoteAction.Strategy strategy) {
-		switch (strategy) {
-			case ENRICH:
-				return "left_outer";
-			case UPSERT:
-				return "full_outer";
-			default:
-				throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString());
-		}
-	}
-}
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java
@ -67,9 +67,8 @@ public class PromoteActionPayloadForGraphTableJob {
 		String outputGraphTablePath = parser.get("outputGraphTablePath");
 		logger.info("outputGraphTablePath: {}", outputGraphTablePath);

-		MergeAndGet.Strategy mergeAndGetStrategy = MergeAndGet.Strategy
-			.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
-		logger.info("mergeAndGetStrategy: {}", mergeAndGetStrategy);
+		MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
+		logger.info("strategy: {}", strategy);

 		Boolean shouldGroupById = Optional
 			.ofNullable(parser.get("shouldGroupById"))
@ -77,12 +76,6 @@ public class PromoteActionPayloadForGraphTableJob {
 			.orElse(true);
 		logger.info("shouldGroupById: {}", shouldGroupById);

-		PromoteAction.Strategy promoteActionStrategy = Optional
-			.ofNullable(parser.get("promoteActionStrategy"))
-			.map(PromoteAction.Strategy::valueOf)
-			.orElse(PromoteAction.Strategy.UPSERT);
-		logger.info("promoteActionStrategy: {}", promoteActionStrategy);
-
 		@SuppressWarnings("unchecked")
 		Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
 		@SuppressWarnings("unchecked")
@ -104,8 +97,7 @@ public class PromoteActionPayloadForGraphTableJob {
 					inputGraphTablePath,
 					inputActionPayloadPath,
 					outputGraphTablePath,
-					mergeAndGetStrategy,
-					promoteActionStrategy,
+					strategy,
 					rowClazz,
 					actionPayloadClazz,
 					shouldGroupById);
@ -132,16 +124,14 @@ public class PromoteActionPayloadForGraphTableJob {
 		String inputGraphTablePath,
 		String inputActionPayloadPath,
 		String outputGraphTablePath,
-		MergeAndGet.Strategy mergeAndGetStrategy,
-		PromoteAction.Strategy promoteActionStrategy,
+		MergeAndGet.Strategy strategy,
 		Class<G> rowClazz,
 		Class<A> actionPayloadClazz, Boolean shouldGroupById) {
 		Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
 		Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);

 		Dataset<G> result = promoteActionPayloadForGraphTable(
-			rowDS, actionPayloadDS, mergeAndGetStrategy, promoteActionStrategy, rowClazz, actionPayloadClazz,
-			shouldGroupById)
+			rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById)
 				.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));

 		saveGraphTable(result, outputGraphTablePath);
@ -193,8 +183,7 @@ public class PromoteActionPayloadForGraphTableJob {
 	private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
 		Dataset<G> rowDS,
 		Dataset<A> actionPayloadDS,
-		MergeAndGet.Strategy mergeAndGetStrategy,
-		PromoteAction.Strategy promoteActionStrategy,
+		MergeAndGet.Strategy strategy,
 		Class<G> rowClazz,
 		Class<A> actionPayloadClazz,
 		Boolean shouldGroupById) {
@ -206,9 +195,8 @@ public class PromoteActionPayloadForGraphTableJob {

 		SerializableSupplier<Function<G, String>> rowIdFn = ModelSupport::idFn;
 		SerializableSupplier<Function<A, String>> actionPayloadIdFn = ModelSupport::idFn;
-		SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet
-			.functionFor(mergeAndGetStrategy);
-		SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(mergeAndGetStrategy);
+		SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
+		SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
 		SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
 		SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;

@ -219,7 +207,6 @@ public class PromoteActionPayloadForGraphTableJob {
 				rowIdFn,
 				actionPayloadIdFn,
 				mergeRowWithActionPayloadAndGetFn,
-				promoteActionStrategy,
 				rowClazz,
 				actionPayloadClazz);

--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
@ -34,7 +34,6 @@ public class PromoteActionPayloadFunctions {
 	 * @param rowIdFn Function used to get the id of graph table row
 	 * @param actionPayloadIdFn Function used to get id of action payload instance
 	 * @param mergeAndGetFn Function used to merge graph table row and action payload instance
-	 * @param promoteActionStrategy the Actionset promotion strategy
 	 * @param rowClazz Class of graph table
 	 * @param actionPayloadClazz Class of action payload
 	 * @param <G> Type of graph table row
@ -47,7 +46,6 @@ public class PromoteActionPayloadFunctions {
 		SerializableSupplier<Function<G, String>> rowIdFn,
 		SerializableSupplier<Function<A, String>> actionPayloadIdFn,
 		SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
-		PromoteAction.Strategy promoteActionStrategy,
 		Class<G> rowClazz,
 		Class<A> actionPayloadClazz) {
 		if (!isSubClass(rowClazz, actionPayloadClazz)) {
@ -63,7 +61,7 @@ public class PromoteActionPayloadFunctions {
 			.joinWith(
 				actionPayloadWithIdDS,
 				rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
-				PromoteAction.joinTypeForStrategy(promoteActionStrategy))
+				"full_outer")
 			.map(
 				(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
 					Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json
@ -41,12 +41,6 @@
    "paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
    "paramRequired": true
  },
-  {
-    "paramName": "pas",
-    "paramLongName": "promoteActionStrategy",
-    "paramDescription": "strategy for promoting the actionset contents into the graph tables, ENRICH or UPSERT (default)",
-    "paramRequired": false
-  },
  {
    "paramName": "sgid",
    "paramLongName": "shouldGroupById",
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml
@ -115,7 +115,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
@ -168,7 +167,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml
@ -106,7 +106,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/datasource</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml
@ -106,7 +106,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/organization</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml
@ -114,7 +114,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
@ -167,7 +166,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml
@ -106,7 +106,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/project</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml
@ -115,7 +115,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
@ -168,7 +167,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml
@ -107,7 +107,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/relation</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml
@ -114,7 +114,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
@ -167,7 +166,6 @@
            <arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
            <arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
            <arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
-            <arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
            <arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java
+++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java
@ -54,7 +54,7 @@ public class PromoteActionPayloadFunctionsTest {
 				RuntimeException.class,
 				() -> PromoteActionPayloadFunctions
 					.joinGraphTableWithActionPayloadAndMerge(
-						null, null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
+						null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
 		}

 		@Test
@ -104,7 +104,6 @@ public class PromoteActionPayloadFunctionsTest {
 					rowIdFn,
 					actionPayloadIdFn,
 					mergeAndGetFn,
-					PromoteAction.Strategy.UPSERT,
 					OafImplSubSub.class,
 					OafImplSubSub.class)
 				.collectAsList();
@ -184,7 +183,6 @@ public class PromoteActionPayloadFunctionsTest {
 					rowIdFn,
 					actionPayloadIdFn,
 					mergeAndGetFn,
-					PromoteAction.Strategy.UPSERT,
 					OafImplSubSub.class,
 					OafImplSub.class)
 				.collectAsList();
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
@ -58,7 +58,7 @@ public class CollectorWorker extends ReportingJob {

 	public void collect() throws UnknownCollectorPluginException, CollectorException, IOException {

-		final String outputPath = mdStoreVersion.getHdfsPath() + SEQUENCE_FILE_NAME;
+		final String outputPath = this.mdStoreVersion.getHdfsPath() + SEQUENCE_FILE_NAME;
 		log.info("outputPath path is {}", outputPath);

 		final CollectorPlugin plugin = getCollectorPlugin();
@ -68,36 +68,36 @@ public class CollectorWorker extends ReportingJob {

 		try (SequenceFile.Writer writer = SequenceFile
 			.createWriter(
-				fileSystem.getConf(),
-				SequenceFile.Writer.file(new Path(outputPath)),
-				SequenceFile.Writer.keyClass(IntWritable.class),
-				SequenceFile.Writer.valueClass(Text.class),
+				this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
+					.keyClass(IntWritable.class),
+				SequenceFile.Writer
+					.valueClass(Text.class),
 				SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
 			final IntWritable key = new IntWritable(counter.get());
 			final Text value = new Text();
 			plugin
-				.collect(api, report)
-				.forEach(
-					content -> {
-						key.set(counter.getAndIncrement());
-						value.set(content);
-						try {
-							writer.append(key, value);
-						} catch (Throwable e) {
-							throw new RuntimeException(e);
-						}
-					});
-		} catch (Throwable e) {
-			report.put(e.getClass().getName(), e.getMessage());
+				.collect(this.api, this.report)
+				.forEach(content -> {
+					key.set(counter.getAndIncrement());
+					value.set(content);
+					try {
+						writer.append(key, value);
+					} catch (final Throwable e) {
+						throw new RuntimeException(e);
+					}
+				});
+		} catch (final Throwable e) {
+			this.report.put(e.getClass().getName(), e.getMessage());
 			throw new CollectorException(e);
 		} finally {
 			shutdown();
-			report.ongoing(counter.longValue(), counter.longValue());
+			this.report.ongoing(counter.longValue(), counter.longValue());
 		}
 	}

-	private void scheduleReport(AtomicInteger counter) {
+	private void scheduleReport(final AtomicInteger counter) {
 		schedule(new ReporterCallback() {
+
 			@Override
 			public Long getCurrent() {
 				return counter.longValue();
@ -112,33 +112,33 @@ public class CollectorWorker extends ReportingJob {

 	private CollectorPlugin getCollectorPlugin() throws UnknownCollectorPluginException {

-		switch (CollectorPlugin.NAME.valueOf(api.getProtocol())) {
+		switch (CollectorPlugin.NAME.valueOf(this.api.getProtocol())) {
 			case oai:
-				return new OaiCollectorPlugin(clientParams);
+				return new OaiCollectorPlugin(this.clientParams);
 			case rest_json2xml:
-				return new RestCollectorPlugin(clientParams);
+				return new RestCollectorPlugin(this.clientParams);
 			case file:
-				return new FileCollectorPlugin(fileSystem);
+				return new FileCollectorPlugin(this.fileSystem);
 			case fileGzip:
-				return new FileGZipCollectorPlugin(fileSystem);
+				return new FileGZipCollectorPlugin(this.fileSystem);
 			case baseDump:
 				return new BaseCollectorPlugin(this.fileSystem);
 			case other:
 				final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
-					.ofNullable(api.getParams().get("other_plugin_type"))
+					.ofNullable(this.api.getParams().get("other_plugin_type"))
 					.map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
 					.orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type"));

 				switch (plugin) {
 					case mdstore_mongodb_dump:
-						return new MongoDbDumpCollectorPlugin(fileSystem);
+						return new MongoDbDumpCollectorPlugin(this.fileSystem);
 					case mdstore_mongodb:
 						return new MDStoreCollectorPlugin();
 					default:
 						throw new UnknownCollectorPluginException("plugin is not managed: " + plugin);
 				}
 			default:
-				throw new UnknownCollectorPluginException("protocol is not managed: " + api.getProtocol());
+				throw new UnknownCollectorPluginException("protocol is not managed: " + this.api.getProtocol());
 		}
 	}

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseAnalyzerJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseAnalyzerJob.java
@ -0,0 +1,379 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+import static org.apache.spark.sql.functions.col;
+import static org.apache.spark.sql.functions.count;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.ObjectUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.DeflateCodec;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.DocumentHelper;
+import org.dom4j.Element;
+import org.dom4j.Node;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.DbClient;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import scala.Tuple2;
+
+public class BaseAnalyzerJob {
+
+	private static final String BASE_DUMP = "BASE_DUMP";
+	private static final Logger log = LoggerFactory.getLogger(BaseAnalyzerJob.class);
+
+	public static void main(final String[] args) throws Exception {
+
+		final String jsonConfiguration = IOUtils
+			.toString(
+				BaseAnalyzerJob.class
+					.getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/action_set_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+		parser.parseArgument(args);
+
+		final Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath: {}", inputPath);
+
+		final String dataPath = parser.get("dataPath");
+		log.info("dataPath {}: ", dataPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}: ", outputPath);
+
+		final String opendoarPath = parser.get("opendoarPath");
+		log.info("opendoarPath {}: ", opendoarPath);
+
+		final String typesReportPath = parser.get("typesReportPath");
+		log.info("typesReportPath {}: ", typesReportPath);
+
+		final int fromStep = Integer.parseInt(parser.get("fromStep"));
+		log.info("fromStep {}: ", fromStep);
+
+		final String dbUrl = parser.get("postgresUrl");
+		log.info("postgresUrl {}: ", dbUrl);
+
+		final String dbUser = parser.get("postgresUser");
+		log.info("postgresUser {}: ", dbUser);
+
+		final String dbPassword = parser.get("postgresPassword");
+		log.info("postgresPassword {}: ", dbPassword);
+
+		final SparkConf conf = new SparkConf();
+
+		runWithSparkSession(conf, isSparkSessionManaged, spark -> {
+			if (fromStep <= 0) {
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 0: LoadRecords\n**************************************");
+				loadRecords(inputPath, dataPath);
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 0: DONE\n**************************************");
+			}
+
+			if (fromStep <= 1) {
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 1: Base Report\n**************************************");
+				generateReport(spark, dataPath, outputPath);
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 1: DONE\n**************************************");
+			}
+
+			if (fromStep <= 2) {
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 2: OpenDOAR Report\n**************************************");
+				generateOpenDoarReport(spark, outputPath, opendoarPath, loadOpenDoarStats(dbUrl, dbUser, dbPassword));
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 2: DONE\n**************************************");
+			}
+
+			if (fromStep <= 3) {
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 3: Type Vocabulary Report\n**************************************");
+				generateVocTypeReport(spark, outputPath, typesReportPath);
+				log
+					.info(
+						"\n**************************************\n* EXECUTING STEP 3: DONE\n**************************************");
+			}
+		});
+
+	}
+
+	private static void generateVocTypeReport(final SparkSession spark,
+		final String reportPath,
+		final String typesReportPath) {
+		spark
+			.read()
+			.parquet(reportPath)
+			.as(Encoders.bean(BaseRecordInfo.class))
+			.flatMap(rec -> {
+				final List<Tuple2<String, String>> list = new ArrayList<>();
+				for (final String t1 : rec.getTypes()) {
+					if (t1.startsWith("TYPE_NORM:")) {
+						for (final String t2 : rec.getTypes()) {
+							if (t2.startsWith("TYPE:")) {
+								list
+									.add(
+										new Tuple2<>(StringUtils.substringAfter(t1, "TYPE_NORM:").trim(),
+											StringUtils.substringAfter(t2, "TYPE:").trim()));
+							}
+						}
+					}
+				}
+				return list.iterator();
+			}, Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
+			.distinct()
+			.write()
+			.mode(SaveMode.Overwrite)
+			.format("parquet")
+			.save(typesReportPath);
+
+	}
+
+	private static void generateOpenDoarReport(final SparkSession spark,
+		final String reportPath,
+		final String opendoarPath,
+		final List<OpenDoarRepoStatus> repos) {
+
+		final Dataset<OpenDoarRepoStatus> fromDB = spark.createDataset(repos, Encoders.bean(OpenDoarRepoStatus.class));
+
+		final Dataset<OpenDoarRepoStatus> fromBASE = spark
+			.read()
+			.parquet(reportPath)
+			.selectExpr("explode(collections) as collection")
+			.where("isnotnull(collection.opendoarId) and character_length(collection.opendoarId)>0")
+			.selectExpr("concat('opendoar____::',collection.opendoarId) as id")
+			.groupBy(col("id"))
+			.agg(count(col("id")))
+			.map(row -> {
+				final OpenDoarRepoStatus repo = new OpenDoarRepoStatus();
+				repo.setId(row.getString(0));
+				repo.getAggregations().put(BASE_DUMP, row.getLong(1));
+				repo.setBaseCount(row.getLong(1));
+				repo.setOpenaireCount(0);
+				repo.setHighCompliance(false);
+				return repo;
+			}, Encoders.bean(OpenDoarRepoStatus.class));
+
+		fromDB
+			.joinWith(fromBASE, fromDB.col("id").equalTo(fromBASE.col("id")), "full_outer")
+			.map(t -> merge(t._1, t._2), Encoders.bean(OpenDoarRepoStatus.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.format("parquet")
+			.save(opendoarPath);
+	}
+
+	private static OpenDoarRepoStatus merge(final OpenDoarRepoStatus r1, final OpenDoarRepoStatus r2) {
+		if (r1 == null) {
+			return r2;
+		}
+		if (r2 == null) {
+			return r1;
+		}
+
+		final OpenDoarRepoStatus r = new OpenDoarRepoStatus();
+		r.setId(ObjectUtils.firstNonNull(r1.getId(), r2.getId()));
+		r.setJurisdiction(ObjectUtils.firstNonNull(r1.getJurisdiction(), r2.getJurisdiction()));
+		r.getAggregations().putAll(r1.getAggregations());
+		r.getAggregations().putAll(r2.getAggregations());
+		r.setHighCompliance(r1.isHighCompliance() || r2.isHighCompliance());
+		r.setBaseCount(Math.max(r1.getBaseCount(), r2.getBaseCount()));
+		r.setOpenaireCount(Math.max(r1.getOpenaireCount(), r2.getOpenaireCount()));
+
+		return r;
+	}
+
+	private static List<OpenDoarRepoStatus> loadOpenDoarStats(final String dbUrl,
+		final String dbUser,
+		final String dbPassword) throws Exception {
+		final List<OpenDoarRepoStatus> repos = new ArrayList<>();
+
+		try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
+
+			final String sql = IOUtils
+				.toString(
+					BaseAnalyzerJob.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-aggregation-status.sql"));
+
+			dbClient.processResults(sql, row -> {
+				try {
+					final OpenDoarRepoStatus repo = new OpenDoarRepoStatus();
+					repo.setId(row.getString("id"));
+					repo.setJurisdiction(row.getString("jurisdiction"));
+					repo.setBaseCount(0);
+					repo.setHighCompliance(false);
+
+					long sum = 0;
+					for (final String s : (String[]) row.getArray("aggregations").getArray()) {
+						final String api = StringUtils.substringBefore(s, "@@@");
+						final long count = NumberUtils.toLong(StringUtils.substringAfter(s, "@@@"), 0);
+						sum += count;
+						repo.getAggregations().put(api, count);
+						// This should recognize the HIGH Compliances: openaire*X.Y*
+						if (s.contains("compliance: openaire")) {
+							repo.setHighCompliance(true);
+						}
+					}
+					repo.setOpenaireCount(sum);
+
+					repos.add(repo);
+					log.info("# FOUND OPENDOAR (DB): " + repo.getId());
+				} catch (final SQLException e) {
+					log.error("Error in SQL", e);
+					throw new RuntimeException("Error in SQL", e);
+				}
+			});
+		}
+		return repos;
+	}
+
+	private static void loadRecords(final String inputPath, final String outputPath) throws Exception {
+		try (final FileSystem fs = FileSystem.get(new Configuration());
+			final AggregatorReport report = new AggregatorReport()) {
+
+			final AtomicLong recordsCounter = new AtomicLong(0);
+
+			final LongWritable key = new LongWritable();
+			final Text value = new Text();
+
+			try (final SequenceFile.Writer writer = SequenceFile
+				.createWriter(
+					fs.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
+						.keyClass(LongWritable.class),
+					SequenceFile.Writer
+						.valueClass(Text.class),
+					SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
+
+				final BaseCollectorIterator iteraror = new BaseCollectorIterator(fs, new Path(inputPath), report);
+
+				while (iteraror.hasNext()) {
+					final String record = iteraror.next();
+
+					final long i = recordsCounter.incrementAndGet();
+					if ((i % 10000) == 0) {
+						log.info("# Loaded records: " + i);
+					}
+
+					key.set(i);
+					value.set(record);
+					try {
+						writer.append(key, value);
+					} catch (final Throwable e1) {
+						throw new RuntimeException(e1);
+					}
+				}
+
+				log.info("# COMPLETED - Loaded records: " + recordsCounter.get());
+			}
+		}
+	}
+
+	private static void generateReport(final SparkSession spark,
+		final String inputPath,
+		final String targetPath) throws Exception {
+
+		final JavaRDD<BaseRecordInfo> rdd = JavaSparkContext
+			.fromSparkContext(spark.sparkContext())
+			.sequenceFile(inputPath, LongWritable.class, Text.class)
+			.map(s -> s._2.toString())
+			.map(BaseAnalyzerJob::extractInfo);
+
+		spark
+			.createDataset(rdd.rdd(), Encoders.bean(BaseRecordInfo.class))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.format("parquet")
+			.save(targetPath);
+	}
+
+	protected static BaseRecordInfo extractInfo(final String s) {
+		try {
+			final Document record = DocumentHelper.parseText(s);
+
+			final BaseRecordInfo info = new BaseRecordInfo();
+
+			final Set<String> paths = new LinkedHashSet<>();
+			final Set<String> types = new LinkedHashSet<>();
+			final List<BaseCollectionInfo> colls = new ArrayList<>();
+
+			for (final Object o : record.selectNodes("//*|//@*")) {
+				paths.add(((Node) o).getPath());
+
+				if (o instanceof Element) {
+					final Element n = (Element) o;
+
+					final String nodeName = n.getName();
+
+					if ("collection".equals(nodeName)) {
+						final String collName = n.getText().trim();
+
+						if (StringUtils.isNotBlank(collName)) {
+							final BaseCollectionInfo coll = new BaseCollectionInfo();
+							coll.setId(collName);
+							coll.setOpendoarId(n.valueOf("@opendoar_id").trim());
+							coll.setRorId(n.valueOf("@ror_id").trim());
+							colls.add(coll);
+						}
+					} else if ("type".equals(nodeName)) {
+						types.add("TYPE: " + n.getText().trim());
+					} else if ("typenorm".equals(nodeName)) {
+						types.add("TYPE_NORM: " + n.getText().trim());
+					}
+				}
+			}
+
+			info.setId(record.valueOf("//*[local-name() = 'header']/*[local-name() = 'identifier']").trim());
+			info.getTypes().addAll(types);
+			info.getPaths().addAll(paths);
+			info.setCollections(colls);
+
+			return info;
+		} catch (final DocumentException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectionInfo.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectionInfo.java
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java
@ -45,22 +45,15 @@ public class BaseCollectorPlugin implements CollectorPlugin {

 	@Override
 	public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
-		// the path of the dump file on HDFS
-		// http://oai.base-search.net/initial_load/base_oaipmh_dump-current.tar
-		// it could be downloaded from iis-cdh5-test-gw.ocean.icm.edu.pl and then copied on HDFS
+		// get path to file
 		final Path filePath = Optional
-			.ofNullable(api.getBaseUrl())
-			.map(Path::new)
-			.orElseThrow(() -> new CollectorException("missing baseUrl"));
+				.ofNullable(api.getBaseUrl())
+				.map(Path::new)
+				.orElseThrow(() -> new CollectorException("missing baseUrl"));

-		// get the parameters for the connection to the OpenAIRE database.
-		// the database is used to obtain the list of the datasources that the plugin will collect
 		final String dbUrl = api.getParams().get("dbUrl");
 		final String dbUser = api.getParams().get("dbUser");
 		final String dbPassword = api.getParams().get("dbPassword");
-
-		// the types(comma separated, empty value for all) that the plugin will collect,
-		// the types should be expressed in the format of the normalized types of BASE (for example 1,121,...)
 		final String acceptedNormTypesString = api.getParams().get("acceptedNormTypes");

 		log.info("baseUrl: {}", filePath);
@ -70,9 +63,7 @@ public class BaseCollectorPlugin implements CollectorPlugin {
 		log.info("acceptedNormTypes: {}", acceptedNormTypesString);

 		try {
-			if (!this.fs.exists(filePath)) {
-				throw new CollectorException("path does not exist: " + filePath);
-			}
+			if (!this.fs.exists(filePath)) { throw new CollectorException("path does not exist: " + filePath); }
 		} catch (final Throwable e) {
 			throw new CollectorException(e);
 		}
@ -91,19 +82,19 @@ public class BaseCollectorPlugin implements CollectorPlugin {
 		final Iterator<String> iterator = new BaseCollectorIterator(this.fs, filePath, report);
 		final Spliterator<String> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
 		return StreamSupport
-			.stream(spliterator, false)
-			.filter(doc -> filterXml(doc, acceptedOpendoarIds, acceptedNormTypes));
+				.stream(spliterator, false)
+				.filter(doc -> filterXml(doc, acceptedOpendoarIds, acceptedNormTypes));
 	}

 	private Set<String> findAcceptedOpendoarIds(final String dbUrl, final String dbUser, final String dbPassword)
-		throws CollectorException {
+			throws CollectorException {
 		final Set<String> accepted = new HashSet<>();

 		try (final DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {

 			final String sql = IOUtils
-				.toString(
-					getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql"));
+					.toString(BaseAnalyzerJob.class
+							.getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql"));

 			dbClient.processResults(sql, row -> {
 				try {
@ -127,26 +118,20 @@ public class BaseCollectorPlugin implements CollectorPlugin {
 	}

 	protected static boolean filterXml(final String xml,
-		final Set<String> acceptedOpendoarIds,
-		final Set<String> acceptedNormTypes) {
+			final Set<String> acceptedOpendoarIds,
+			final Set<String> acceptedNormTypes) {
 		try {

 			final Document doc = DocumentHelper.parseText(xml);

 			final String id = doc.valueOf("//*[local-name()='collection']/@opendoar_id").trim();

-			if (StringUtils.isBlank(id) || !acceptedOpendoarIds.contains("opendoar____::" + id)) {
-				return false;
-			}
+			if (StringUtils.isBlank(id) || !acceptedOpendoarIds.contains("opendoar____::" + id)) { return false; }

-			if (acceptedNormTypes.isEmpty()) {
-				return true;
-			}
+			if (acceptedNormTypes.isEmpty()) { return true; }

 			for (final Object s : doc.selectNodes("//*[local-name()='typenorm']")) {
-				if (acceptedNormTypes.contains(((Node) s).getText().trim())) {
-					return true;
-				}
+				if (acceptedNormTypes.contains(((Node) s).getText().trim())) { return true; }
 			}

 			return false;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseRecordInfo.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseRecordInfo.java
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/OpenDoarRepoStatus.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/OpenDoarRepoStatus.java
@ -0,0 +1,71 @@
+
+package eu.dnetlib.dhp.collection.plugin.base;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+public class OpenDoarRepoStatus implements Serializable {
+
+	private static final long serialVersionUID = 4832658700366871160L;
+
+	private String id;
+
+	private String jurisdiction;
+
+	private boolean highCompliance = false;
+
+	private long baseCount = 0;
+
+	private long openaireCount = 0;
+
+	private Map<String, Long> aggregations = new HashMap<>();
+
+	public String getId() {
+		return this.id;
+	}
+
+	public void setId(final String id) {
+		this.id = id;
+	}
+
+	public String getJurisdiction() {
+		return this.jurisdiction;
+	}
+
+	public void setJurisdiction(final String jurisdiction) {
+		this.jurisdiction = jurisdiction;
+	}
+
+	public Map<String, Long> getAggregations() {
+		return this.aggregations;
+	}
+
+	public void setAggregations(final Map<String, Long> aggregations) {
+		this.aggregations = aggregations;
+	}
+
+	public boolean isHighCompliance() {
+		return this.highCompliance;
+	}
+
+	public void setHighCompliance(final boolean highCompliance) {
+		this.highCompliance = highCompliance;
+	}
+
+	public long getOpenaireCount() {
+		return this.openaireCount;
+	}
+
+	public void setOpenaireCount(final long openaireCount) {
+		this.openaireCount = openaireCount;
+	}
+
+	public long getBaseCount() {
+		return this.baseCount;
+	}
+
+	public void setBaseCount(final long baseCount) {
+		this.baseCount = baseCount;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/action_set_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/action_set_parameters.json
@ -0,0 +1,56 @@
+[
+	{
+		"paramName": "i",
+		"paramLongName": "inputPath",
+		"paramDescription": "the path of the BASE dump",
+		"paramRequired": true
+	},
+	{
+		"paramName": "d",
+		"paramLongName": "dataPath",
+		"paramDescription": "the path of the loaded records",
+		"paramRequired": true
+	},
+	{
+		"paramName": "o",
+		"paramLongName": "outputPath",
+		"paramDescription": "the path of the generated the report",
+		"paramRequired": true
+	},
+	{
+		"paramName": "od",
+		"paramLongName": "opendoarPath",
+		"paramDescription": "the path of the generated the OpenDOAR report",
+		"paramRequired": true
+	},
+	{
+		"paramName": "t",
+		"paramLongName": "typesReportPath",
+		"paramDescription": "the path of the generated the types report",
+		"paramRequired": true
+	},
+	{
+		"paramName": "f",
+		"paramLongName": "fromStep",
+		"paramDescription": "the initial step (numeric, 0 for ALL STEPS)",
+		"paramRequired": true
+	},
+	{
+		"paramName": "pgurl",
+		"paramLongName": "postgresUrl",
+		"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
+		"paramRequired": true
+	},
+	{
+		"paramName": "pguser",
+		"paramLongName": "postgresUser",
+		"paramDescription": "postgres user",
+		"paramRequired": false
+	},
+	{
+		"paramName": "pgpasswd",
+		"paramLongName": "postgresPassword",
+		"paramDescription": "postgres password",
+		"paramRequired": false
+	}
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/oozie_app/config-default.xml
@ -0,0 +1,58 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>hive_metastore_uris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>spark2YarnHistoryServerAddress</name>
+        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
+    </property>
+    <property>
+        <name>spark2ExtraListeners</name>
+        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+    </property>
+    <property>
+        <name>spark2SqlQueryExecutionListeners</name>
+        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>sparkExecutorNumber</name>
+        <value>4</value>
+    </property>
+    <property>
+        <name>spark2EventLogDir</name>
+        <value>/user/spark/spark2ApplicationHistory</value>
+    </property>
+    <property>
+        <name>sparkDriverMemory</name>
+        <value>15G</value>
+    </property>
+    <property>
+        <name>sparkExecutorMemory</name>
+        <value>10G</value>
+    </property>
+    <property>
+        <name>sparkExecutorCores</name>
+        <value>1</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/oozie_app/workflow.xml
@ -0,0 +1,79 @@
+<workflow-app name="Analyze_BASE_Records" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>baseInputPath</name>
+            <description>the path of the BASE dump</description>
+        </property>
+        <property>
+            <name>baseDataPath</name>
+            <description>the path where to store BASE records</description>
+        </property>
+        <property>
+            <name>baseReportsPath</name>
+            <description>path where to store the reports</description>
+        </property>
+        <property>
+            <name>baseOpenDoarReportsPath</name>
+            <description>path where to store the OpenDOAR reports</description>
+        </property>
+        <property>
+			<name>baseTypesReportPath</name>
+			<description>path of the generated the types report</description>        
+        </property>
+        <property>
+            <name>postgresURL</name>
+            <description>the postgres URL to access to the database</description>
+        </property>
+        <property>
+            <name>postgresUser</name>
+            <description>the user postgres</description>
+        </property>
+        <property>
+            <name>postgresPassword</name>
+            <description>the password postgres</description>
+        </property> 
+        <property>
+            <name>baseFromStep</name>
+            <description>the initial step (numeric, 0 for ALL STEPS)</description>
+        </property>
+    </parameters>
+
+    <start to="analyzeBaseRecords"/>
+    
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="analyzeBaseRecords">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>AnalyzeBaseRecords</name>
+            <class>eu.dnetlib.dhp.collection.plugin.base.BaseAnalyzerJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${baseInputPath}</arg>
+            <arg>--dataPath</arg><arg>${baseDataPath}</arg>
+            <arg>--outputPath</arg><arg>${baseReportsPath}</arg>
+            <arg>--opendoarPath</arg><arg>${baseOpenDoarReportsPath}</arg>
+            <arg>--typesReportPath</arg><arg>${baseTypesReportPath}</arg>     
+            <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
+            <arg>--postgresUser</arg><arg>${postgresUser}</arg>
+            <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
+            <arg>--fromStep</arg><arg>${baseFromStep}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql
@ -99,16 +99,4 @@ INSERT INTO dsm_apiparams(
 	'***'
 );

-INSERT INTO dsm_apiparams(
-	_dnet_resource_identifier_, 
-	api, 
-	param, 
-	value
-) VALUES (
-	'api_________::openaire____::base_search::dump@@acceptedNormTypes',
-	'api_________::openaire____::base_search::dump',
-	'acceptedNormTypes',
-	'1,11,111,121,13,14,15,18,181,182,183,1A,6,7'
-);
-
 COMMIT;
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql
@ -2,8 +2,6 @@ select s.id as id
 from dsm_services s 
 where collectedfrom = 'openaire____::opendoar' 
 and jurisdiction = 'Institutional'
-and s.id in (
-	select service from dsm_api where coalesce(compatibility_override, compatibility) = 'driver' or coalesce(compatibility_override, compatibility) = 'UNKNOWN'
-) and s.id not in (
-	select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%'
-);
+and s.id not in (
+	select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%' or last_collection_total > 0
+);
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json
@ -1048,10 +1048,5 @@
  "openaire_id": "re3data_____::r3d100010399",
  "datacite_name": "ZEW Forschungsdatenzentrum",
  "official_name": "ZEW Forschungsdatenzentrum"
- },
- "HBP.NEUROINF": {
-  "openaire_id": "fairsharing_::2975",
-  "datacite_name": "EBRAINS",
-  "official_name": "EBRAINS"
 }
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java
@ -6,7 +6,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@ -22,7 +21,6 @@ import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.dom4j.Attribute;
 import org.dom4j.Document;
-import org.dom4j.DocumentException;
 import org.dom4j.DocumentHelper;
 import org.dom4j.Element;
 import org.dom4j.Node;
@ -119,7 +117,7 @@ public class BaseCollectorIteratorTest {
 		final List<BaseRecordInfo> ls = new ArrayList<>();

 		for (int i = 0; i < 10; i++) {
-			ls.add(extractInfo(xml));
+			ls.add(BaseAnalyzerJob.extractInfo(xml));
 		}

 		final JavaRDD<BaseRecordInfo> rdd = JavaSparkContext
@ -133,52 +131,4 @@ public class BaseCollectorIteratorTest {

 		df.show(false);
 	}
-
-	private BaseRecordInfo extractInfo(final String s) {
-		try {
-			final Document record = DocumentHelper.parseText(s);
-
-			final BaseRecordInfo info = new BaseRecordInfo();
-
-			final Set<String> paths = new LinkedHashSet<>();
-			final Set<String> types = new LinkedHashSet<>();
-			final List<BaseCollectionInfo> colls = new ArrayList<>();
-
-			for (final Object o : record.selectNodes("//*|//@*")) {
-				paths.add(((Node) o).getPath());
-
-				if (o instanceof Element) {
-					final Element n = (Element) o;
-
-					final String nodeName = n.getName();
-
-					if ("collection".equals(nodeName)) {
-						final String collName = n.getText().trim();
-
-						if (StringUtils.isNotBlank(collName)) {
-							final BaseCollectionInfo coll = new BaseCollectionInfo();
-							coll.setId(collName);
-							coll.setOpendoarId(n.valueOf("@opendoar_id").trim());
-							coll.setRorId(n.valueOf("@ror_id").trim());
-							colls.add(coll);
-						}
-					} else if ("type".equals(nodeName)) {
-						types.add("TYPE: " + n.getText().trim());
-					} else if ("typenorm".equals(nodeName)) {
-						types.add("TYPE_NORM: " + n.getText().trim());
-					}
-				}
-			}
-
-			info.setId(record.valueOf("//*[local-name() = 'header']/*[local-name() = 'identifier']").trim());
-			info.getTypes().addAll(types);
-			info.getPaths().addAll(paths);
-			info.setCollections(colls);
-
-			return info;
-		} catch (final DocumentException e) {
-			throw new RuntimeException(e);
-		}
-	}
-
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPluginTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPluginTest.java
@ -1,4 +1,3 @@
-
 package eu.dnetlib.dhp.collection.plugin.base;

 import static org.junit.jupiter.api.Assertions.assertFalse;
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseTransfomationTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseTransfomationTest.java
@ -1,4 +1,3 @@
-
 package eu.dnetlib.dhp.collection.plugin.base;

 import java.io.IOException;
@ -66,9 +65,9 @@ public class BaseTransfomationTest extends AbstractVocabularyTest {

 	private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
 		final String xslt = new SAXReader()
-			.read(this.getClass().getResourceAsStream(path))
-			.selectSingleNode("//CODE/*")
-			.asXML();
+				.read(this.getClass().getResourceAsStream(path))
+				.selectSingleNode("//CODE/*")
+				.asXML();

 		final LongAccumulator la = new LongAccumulator();

--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java
@ -122,41 +122,22 @@ public class DedupRecordFactory {
 				}

 				return Stream
-					.concat(
-						Stream
-							.of(agg.getDedupId())
-							.map(id -> createDedupOafEntity(id, agg.entity, dataInfo, ts)),
-						agg.aliases
-							.stream()
-							.map(id -> createMergedDedupAliasOafEntity(id, agg.entity, dataInfo, ts)))
+					.concat(Stream.of(agg.getDedupId()), agg.aliases.stream())
+					.map(id -> {
+						try {
+							OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity);
+							res.setId(id);
+							res.setDataInfo(dataInfo);
+							res.setLastupdatetimestamp(ts);
+							return res;
+						} catch (Exception e) {
+							throw new RuntimeException(e);
+						}
+					})
 					.iterator();
 			}, beanEncoder);
 	}

-	private static OafEntity createDedupOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) {
-		try {
-			OafEntity res = (OafEntity) BeanUtils.cloneBean(base);
-			res.setId(id);
-			res.setDataInfo(dataInfo);
-			res.setLastupdatetimestamp(ts);
-			return res;
-		} catch (Exception e) {
-			throw new RuntimeException(e);
-		}
-	}
-
-	private static OafEntity createMergedDedupAliasOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) {
-		try {
-			OafEntity res = createDedupOafEntity(id, base, dataInfo, ts);
-			DataInfo ds = (DataInfo) BeanUtils.cloneBean(dataInfo);
-			ds.setDeletedbyinference(true);
-			res.setDataInfo(ds);
-			return res;
-		} catch (Exception e) {
-			throw new RuntimeException(e);
-		}
-	}
-
 	private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) {

 		if (duplicate == null) {
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/config-default.xml
@ -15,12 +15,4 @@
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
-    <property>
-        <name>hiveMetastoreUris</name>
-        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
-    </property>
-    <property>
-        <name>pivotHistoryDatabase</name>
-        <value>&#x200B;</value>
-    </property>
 </configuration>
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml
@ -198,8 +198,6 @@
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
            <arg>--actionSetId</arg><arg>${actionSetId}</arg>
            <arg>--cutConnectedComponent</arg><arg>${cutConnectedComponent}</arg>
-            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
-            <arg>--pivotHistoryDatabase</arg><arg>${pivotHistoryDatabase}</arg>
        </spark>
        <ok to="PrepareOrgRels"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json
@ -73,6 +73,12 @@
    "name": "Irish Nephrology Society",
    "synonym": []
  },
+  {
+    "id": "100011062",
+    "uri": "http://dx.doi.org/10.13039/100011062",
+    "name": "Asian Spinal Cord Network",
+    "synonym": []
+  },
  {
    "id": "100011096",
    "uri": "http://dx.doi.org/10.13039/100011096",
@ -217,6 +223,12 @@
    "name": "Global Brain Health Institute",
    "synonym": []
  },
+  {
+    "id": "100015776",
+    "uri": "http://dx.doi.org/10.13039/100015776",
+    "name": "Health and Social Care Board",
+    "synonym": []
+  },
  {
    "id": "100015992",
    "uri": "http://dx.doi.org/10.13039/100015992",
@ -391,6 +403,18 @@
    "name": "Irish Hospice Foundation",
    "synonym": []
  },
+  {
+    "id": "501100001596",
+    "uri": "http://dx.doi.org/10.13039/501100001596",
+    "name": "Irish Research Council for Science, Engineering and Technology",
+    "synonym": []
+  },
+  {
+    "id": "501100001597",
+    "uri": "http://dx.doi.org/10.13039/501100001597",
+    "name": "Irish Research Council for the Humanities and Social Sciences",
+    "synonym": []
+  },
  {
    "id": "501100001598",
    "uri": "http://dx.doi.org/10.13039/501100001598",
@ -491,7 +515,7 @@
    "id": "501100002081",
    "uri": "http://dx.doi.org/10.13039/501100002081",
    "name": "Irish Research Council",
-    "synonym": ["501100001596", "501100001597"]
+    "synonym": []
  },
  {
    "id": "501100002736",
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -587,15 +587,7 @@ case object Crossref2Oaf {
                "10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
                "10.13039/501100013589" | "10.13039/501100000271" =>
              generateSimpleRelationFromAward(funder, "ukri________", a => a)
-            //HFRI
-            case "10.13039/501100013209" =>
-              generateSimpleRelationFromAward(funder, "hfri________", a => a)
-              val targetId = getProjectId("hfri________", "1e5e62235d094afd01cd56e65112fc63")
-              queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
-              queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
-            //ERASMUS+
-            case "10.13039/501100010790" =>
-              generateSimpleRelationFromAward(funder, "erasmusplus_", a => a)
+
            case _ => logger.debug("no match for " + funder.DOI.get)

          }
--- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala
@ -23,10 +23,15 @@ class CrossrefMappingTest {
  val mapper = new ObjectMapper()

  @Test
-  def testMissingAuthorParser():Unit = {
-    val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString
+  def testMissingAuthorParser(): Unit = {
+    val json: String = Source
+      .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json"))
+      .mkString
    val result = Crossref2Oaf.convert(json)
-    result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0))
+    result
+      .filter(o => o.isInstanceOf[Publication])
+      .map(p => p.asInstanceOf[Publication])
+      .foreach(p => assertTrue(p.getAuthor.size() > 0))
  }

  @Test
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java
@ -53,8 +53,6 @@ public class Constraints implements Serializable {

 		for (Constraint sc : constraint) {
 			boolean verified = false;
-			if(!param.containsKey(sc.getField()))
-				return false;
 			for (String value : param.get(sc.getField())) {
 				if (sc.verifyCriteria(value.trim())) {
 					verified = true;
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -317,7 +317,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 					listKeyValues(
 						createOpenaireId(10, rs.getString("collectedfromid"), true),
 						rs.getString("collectedfromname")));
-			p.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
+			p.setPid(new ArrayList<>());
 			p.setDateofcollection(asString(rs.getDate("dateofcollection")));
 			p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
 			p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@ -238,23 +238,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 				(Element) doc
 					.selectSingleNode(
 						"//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']"))
-			.map(e -> {
-				final String resourceTypeURI = Optional
-					.ofNullable(e.attributeValue("uri"))
-					.filter(StringUtils::isNotBlank)
-					.orElse(null);
-				final String resourceTypeAnyURI = Optional
-					.ofNullable(e.attributeValue("anyURI"))
-					.filter(StringUtils::isNotBlank)
-					.orElse(null);
-				final String resourceTypeTxt = Optional
-					.ofNullable(e.getText())
-					.filter(StringUtils::isNotBlank)
-					.orElse(null);
-				final String resourceTypeGeneral = Optional
-					.ofNullable(e.attributeValue("resourceTypeGeneral"))
-					.filter(StringUtils::isNotBlank)
-					.orElse(null);
+			.map(element -> {
+				final String resourceTypeURI = element.attributeValue("uri");
+				final String resourceTypeAnyURI = element.attributeValue("anyURI");
+				final String resourceTypeTxt = element.getText();
+				final String resourceTypeGeneral = element.attributeValue("resourceTypeGeneral");

 				return ObjectUtils
 					.firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql
@ -33,7 +33,7 @@ SELECT
                dc.officialname                                                                                            AS collectedfromname,
                p.contracttype || '@@@' || p.contracttypescheme                                                            AS contracttype,
                p.provenanceactionclass || '@@@' || p.provenanceactionscheme                                             AS provenanceaction,
-                array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL)            AS pid,
+                array_agg(DISTINCT i.pid || '###' || i.issuertype)                                                                  AS pid,
                array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme)          AS subjects,
                array_agg(DISTINCT fp.path)                                                                                         AS fundingtree

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql
@ -33,7 +33,7 @@ SELECT
                dc.officialname                                                                                            AS collectedfromname,
                p.contracttypeclass || '@@@' || p.contracttypescheme                                                       AS contracttype,
                p.provenanceactionclass || '@@@' || p.provenanceactionscheme                                               AS provenanceaction,
-                array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL)            AS pid,
+                array_agg(DISTINCT i.pid || '###' || i.issuertype)                                                         AS pid,
                array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
                array_agg(DISTINCT fp.path)                                                                                AS fundingtree
        FROM projects p
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala
@ -93,8 +93,8 @@ object CopyHdfsOafSparkApplication {
      hasSource != null && hasTarget != null
    } else {
      val hasId = (json \ "id").extractOrElse[String](null)
-      val resultType = (json \ "resulttype" \ "classid").extractOrElse[String]("")
-      hasId != null && oafType.startsWith(resultType)
+      val resultType = (json \ "resulttype" \ "classid").extractOrElse[String](null)
+      hasId != null && oafType.equalsIgnoreCase(resultType)
    }

  }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java
@ -59,19 +59,7 @@ public class CopyHdfsOafSparkApplicationTest {
 								.getResourceAsStream(
 									"/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json")),
 					"publication"));
-	}

-	@Test
-	void isOafType_Datacite_ORP() throws IOException {
-		assertTrue(
-				CopyHdfsOafSparkApplication
-						.isOafType(
-								IOUtils
-										.toString(
-												getClass()
-														.getResourceAsStream(
-																"/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json")),
-								"otherresearchproduct"));
 	}

 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -1171,34 +1171,6 @@ class MappersTest {

 	}

-	@Test
-	void test_Zenodo2() throws IOException {
-		final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo2.xml")));
-		final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
-
-		assertEquals(3, list.size());
-		Publication p = cleanup((Publication) list.get(0), vocs);
-
-		assertNotNull(p.getInstance());
-		assertEquals(1, p.getInstance().size());
-
-		final Instance instance = p.getInstance().get(0);
-
-		assertNotNull(instance.getInstanceTypeMapping());
-		assertEquals(1, instance.getInstanceTypeMapping().size());
-
-		Optional<InstanceTypeMapping> coarType = instance
-			.getInstanceTypeMapping()
-			.stream()
-			.filter(itm -> ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()))
-			.findFirst();
-
-		assertTrue(coarType.isPresent());
-		assertNotNull(coarType.get().getOriginalType());
-		assertNull(coarType.get().getTypeCode());
-		assertNull(coarType.get().getTypeLabel());
-	}
-
 	@Test
 	void testROHub2() throws IOException {
 		final String xml = IOUtils
@ -1257,7 +1229,7 @@ class MappersTest {
 	}

 	@Test
-	void testD4ScienceTraining() throws IOException {
+	public void testD4ScienceTraining() throws IOException {
 		final String xml = IOUtils
 			.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml")));
 		final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
@ -1268,7 +1240,7 @@ class MappersTest {
 	}

 	@Test
-	void testD4ScienceDataset() throws IOException {
+	public void testD4ScienceDataset() throws IOException {
 		final String xml = IOUtils
 			.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml")));
 		final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml
@ -1,59 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<record xmlns:oaf="http://namespace.openaire.eu/oaf"
-        xmlns:oai="http://www.openarchives.org/OAI/2.0/"
-        xmlns:datacite="http://datacite.org/schema/kernel-3"
-        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
-        xmlns:dri="http://www.driver-repository.eu/namespace/dri">
-    <header xmlns="http://www.openarchives.org/OAI/2.0/">
-        <identifier>oai:zenodo.org:1596086</identifier>
-        <datestamp>2020-01-20T13:50:28Z</datestamp>
-        <setSpec>openaire</setSpec>
-        <dr:dateOfTransformation>2024-02-08T11:03:10.994Z</dr:dateOfTransformation>
-        <dri:objIdentifier>od______2659::036d5555a6688ed00c8d0da97bdece3b</dri:objIdentifier>
-        <dri:dateOfCollection>2024-02-08T11:03:10.994Z</dri:dateOfCollection>
-        <dri:dateOfTransformation>2024-02-08T11:03:10.994Z</dri:dateOfTransformation>
-    </header>
-    <metadata>
-        <resource xmlns="http://datacite.org/schema/kernel-4"
-                  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-                  xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
-            <identifier identifierType="URL">https://zenodo.org/record/1596086</identifier>
-            <alternateIdentifiers xmlns="http://datacite.org/schema/kernel-3"/>
-            <creators>
-                <creator>
-                    <creatorName>Bonney, T. G.</creatorName>
-                    <givenName>T. G.</givenName>
-                    <familyName>Bonney</familyName>
-                </creator>
-            </creators>
-            <titles>
-                <title>Ice Blocks on a Moraine</title>
-            </titles>
-            <publisher>Zenodo</publisher>
-            <publicationYear>1889</publicationYear>
-            <dates>
-                <date dateType="Issued">1889-08-22</date>
-            </dates>
-            <resourceType resourceTypeGeneral="JournalArticle"/>
-            <relatedIdentifiers>
-                <relatedIdentifier relatedIdentifierType="DOI" relationType="IsIdenticalTo"
-                >10.1038/040391a0</relatedIdentifier>
-            </relatedIdentifiers>
-            <rightsList>
-                <rights rightsURI="https://creativecommons.org/publicdomain/zero/1.0/legalcode"
-                >Creative Commons Zero v1.0 Universal</rights>
-                <rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
-            </rightsList>
-            <descriptions>
-                <description descriptionType="Abstract">n/a</description>
-            </descriptions>
-        </resource>
-        <dr:CobjCategory type="publication">0001</dr:CobjCategory>
-        <oaf:dateAccepted>1889-08-22</oaf:dateAccepted>
-        <oaf:accessrights>OPEN</oaf:accessrights>
-        <oaf:license>http://creativecommons.org/publicdomain/zero/1.0/legalcode</oaf:license>
-        <oaf:language/>
-        <oaf:hostedBy name="ZENODO" id="opendoar____::2659"/>
-        <oaf:collectedFrom name="ZENODO" id="opendoar____::2659"/>
-    </metadata>
-</record>
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
@ -185,7 +185,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -213,7 +212,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -241,7 +239,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -269,7 +266,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -297,7 +293,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -325,7 +320,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -353,7 +347,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -393,7 +386,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -422,7 +414,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -451,7 +442,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -480,7 +470,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -509,7 +498,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -538,7 +526,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -567,7 +554,6 @@
                --executor-cores=${sparkExecutorCoresForJoining}
                --executor-memory=${sparkExecutorMemoryForJoining}
                --driver-memory=${sparkDriverMemoryForJoining}
-                --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-stats-hist-snaps/pom.xml
+++ b/dhp-workflows/dhp-stats-hist-snaps/pom.xml
@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>dhp-workflows</artifactId>
-        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.5-SNAPSHOT</version>
-    </parent>
-    <modelVersion>4.0.0</modelVersion>
-    <artifactId>dhp-stats-hist-snaps</artifactId>
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.11</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_2.11</artifactId>
-        </dependency>
-    </dependencies>
-	<build>
-		<plugins>
-			<plugin>
-				<groupId>pl.project13.maven</groupId>
-				<artifactId>git-commit-id-plugin</artifactId>
-                <version>2.1.11</version>
-				<configuration>
-					<failOnNoGitDirectory>false</failOnNoGitDirectory>
-				</configuration>
-			</plugin>
-		</plugins>
-	</build>
-</project>
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/config-default.xml
@ -1,30 +0,0 @@
-<configuration>
-    <property>
-        <name>jobTracker</name>
-        <value>${jobTracker}</value>
-    </property>
-    <property>
-        <name>nameNode</name>
-        <value>${nameNode}</value>
-    </property>
-    <property>
-        <name>oozie.use.system.libpath</name>
-        <value>true</value>
-    </property>
-    <property>
-        <name>oozie.action.sharelib.for.spark</name>
-        <value>spark2</value>
-    </property>
-    <property>
-        <name>hive_metastore_uris</name>
-        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
-    </property>
-    <property>
-        <name>hive_jdbc_url</name>
-        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=22166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=15596411699;spark.yarn.driver.memoryOverhead=1228</value>
-    </property>
-	<property>
-		<name>oozie.wf.workflow.notification.url</name>
-		<value>{serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status</value>
-	</property>
-</configuration>
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh
@ -1,223 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-export HADOOP_USER_NAME=$2
-
-
-# Set the active HDFS node of OCEAN and IMPALA cluster.
-OCEAN_HDFS_NODE='hdfs://nameservice1'
-echo -e "\nOCEAN HDFS virtual-name which resolves automatically to the active-node: ${OCEAN_HDFS_NODE}"
-
-IMPALA_HDFS_NODE=''
-COUNTER=0
-while [ $COUNTER -lt 3 ]; do
-  if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
-      break
-  elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
-      break
-  else
-      IMPALA_HDFS_NODE=''
-      sleep 1
-  fi
-  ((COUNTER++))
-done
-if [ -z "$IMPALA_HDFS_NODE" ]; then
-    echo -e "\n\nERROR: PROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! | AFTER ${COUNTER} RETRIES.\n\n"
-    exit 1
-fi
-echo -e "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries.\n\n"
-
-IMPALA_HOSTNAME='impala-cluster-dn1.openaire.eu'
-IMPALA_CONFIG_FILE='/etc/impala_cluster/hdfs-site.xml'
-
-IMPALA_HDFS_DB_BASE_PATH="${IMPALA_HDFS_NODE}/user/hive/warehouse"
-
-
-# Set sed arguments.
-LOCATION_HDFS_NODE_SED_ARG="s|${OCEAN_HDFS_NODE}|${IMPALA_HDFS_NODE}|g" # This requires to be used with "sed -e" in order to have the "|" delimiter (as the "/" conflicts with the URIs)
-
-# Set the SED command arguments for column-names with reserved words:
-DATE_SED_ARG_1='s/[[:space:]]\date[[:space:]]/\`date\`/g'
-DATE_SED_ARG_2='s/\.date,/\.\`date\`,/g'  # the "date" may be part of a larger field name like "datestamp" or "date_aggregated", so we need to be careful with what we are replacing.
-DATE_SED_ARG_3='s/\.date[[:space:]]/\.\`date\` /g'
-
-HASH_SED_ARG_1='s/[[:space:]]\hash[[:space:]]/\`hash\`/g'
-HASH_SED_ARG_2='s/\.hash,/\.\`hash\`,/g'
-HASH_SED_ARG_3='s/\.hash[[:space:]]/\.\`hash\` /g'
-
-LOCATION_SED_ARG_1='s/[[:space:]]\location[[:space:]]/\`location\`/g'
-LOCATION_SED_ARG_2='s/\.location,/\.\`location\`,/g'
-LOCATION_SED_ARG_3='s/\.location[[:space:]]/\.\`location\` /g'
-
-
-function copydb() {
-  db=$1
-  echo -e "\nStart processing db: '${db}'..\n"
-
-  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-  if [ -n "$log_errors" ]; then
-    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE! EXITING...\n\n"
-    rm -f error.log
-    return 1
-  fi
-
-  # Make Impala aware of the deletion of the old DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-
-  echo -e "\n\nCopying files of '${db}', from Ocean to Impala cluster..\n"
-  # Using max-bandwidth of: 50 * 100 Mb/s = 5 Gb/s
-  # Using max memory of: 50 * 6144 = 300 Gb
-  # Using 1MB as a buffer-size.
-  # The " -Ddistcp.dynamic.recordsPerChunk=50" arg is not available in our version of hadoop
-  # The "ug" args cannot be used as we get a "User does not belong to hive" error.
-  # The "p" argument cannot be used, as it blocks the files from being used, giving a "sticky bit"-error, even after applying chmod and chown onm the files.
-  hadoop distcp -Dmapreduce.map.memory.mb=6144 -m 70 -bandwidth 150 \
-                -numListstatusThreads 40 \
-                -copybuffersize 1048576 \
-                -strategy dynamic \
-                -pb \
-                ${OCEAN_HDFS_NODE}/user/hive/warehouse/${db}.db ${IMPALA_HDFS_DB_BASE_PATH}
-
-  # Check the exit status of the "hadoop distcp" command.
-  if [ $? -eq 0 ]; then
-    echo -e "\nSuccessfully copied the files of '${db}'.\n"
-  else
-    echo -e "\n\nERROR: FAILED TO TRANSFER THE FILES OF '${db}', WITH 'hadoop distcp'. GOT WITH EXIT STATUS: $?\n\n"
-    rm -f error.log
-    return 2
-  fi
-
-  # In case we ever use this script for a writable DB (using inserts/updates), we should perform the following costly operation as well..
-  #hdfs dfs -conf ${IMPALA_CONFIG_FILE} -chmod -R 777 ${TEMP_SUBDIR_FULLPATH}/${db}.db
-
-  echo -e "\nCreating schema for db: '${db}'\n"
-
-  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
-
-  # Make Impala aware of the creation of the new DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
-  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
-  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
-
-  all_create_view_statements=()
-
-  entities_on_ocean=`hive -e "show tables in ${db};" | sed 's/WARN:.*//g'`  # Get the tables and views without any potential the "WARN" logs.
-  for i in ${entities_on_ocean[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Check if this is a view by showing the create-statement where it should print "create view" for a view, not the "create table". Unfortunately, there is no "show views" command.
-    create_entity_statement=`hive -e "show create table ${db}.${i};"` # It needs to happen in two stages, otherwise the "grep" is not able to match multi-line statement.
-
-    create_view_statement_test=`echo -e "$create_entity_statement" | grep 'CREATE VIEW'`
-    if [ -n "$create_view_statement_test" ]; then
-      echo -e "\n'${i}' is a view, so we will save its 'create view' statement and execute it on Impala, after all tables have been created.\n"
-      create_view_statement=`echo -e "$create_entity_statement" | sed 's/WARN:.*//g' | sed 's/\`//g' \
-        | sed 's/"$/;/' | sed 's/^"//' | sed 's/\\"\\"/\"/g' | sed -e "${LOCATION_HDFS_NODE_SED_ARG}" | sed "${DATE_SED_ARG_1}" | sed "${HASH_SED_ARG_1}" | sed "${LOCATION_SED_ARG_1}" \
-        | sed "${DATE_SED_ARG_2}" | sed "${HASH_SED_ARG_2}" | sed "${LOCATION_SED_ARG_2}" \
-        | sed "${DATE_SED_ARG_3}" | sed "${HASH_SED_ARG_3}" | sed "${LOCATION_SED_ARG_3}"`
-      all_create_view_statements+=("$create_view_statement")
-    else
-      echo -e "\n'${i}' is a table, so we will check for its parquet files and create the table on Impala cluster.\n"
-      CURRENT_PRQ_FILE=`hdfs dfs -conf ${IMPALA_CONFIG_FILE} -ls -C "${IMPALA_HDFS_DB_BASE_PATH}/${db}.db/${i}/" | grep -v 'Found' | grep -v '_impala_insert_staging' |  head -1`
-      if [ -z "$CURRENT_PRQ_FILE" ]; then # If there is not parquet-file inside.
-          echo -e "\nERROR: THE TABLE \"${i}\" HAD NO FILES TO GET THE SCHEMA FROM! IT'S EMPTY!\n\n"
-      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
-        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-        if [ -n "$log_errors" ]; then
-          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
-        fi
-      fi
-    fi
-  done
-
-  echo -e "\nAll tables have been created, going to create the views..\n"
-
-  # Time to loop through the views and create them.
-  # At this point all table-schemas should have been created.
-
-  previous_num_of_views_to_retry=${#all_create_view_statements}
-  if [[ $previous_num_of_views_to_retry -gt 0 ]]; then
-    echo -e "\nAll_create_view_statements:\n\n${all_create_view_statements[@]}\n"  # DEBUG
-    # Make Impala aware of the new tables, so it knows them when creating the views.
-    sleep 1
-    impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-    sleep 1
-  else
-    echo -e "\nDB '${db}' does not contain any views.\n"
-  fi
-
-  level_counter=0
-  while [[ ${#all_create_view_statements[@]} -gt 0 ]]; do
-    ((level_counter++))
-    # The only accepted reason for a view to not be created, is if it depends on another view, which has not been created yet.
-    # In this case, we should retry creating this particular view again.
-    should_retry_create_view_statements=()
-
-    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
-      if [ -n "$specific_errors" ]; then
-        echo -e "\nspecific_errors: ${specific_errors}\n"
-        echo -e "\nView '$(cat error.log | grep "CREATE VIEW " | sed 's/CREATE VIEW //g' | sed 's/ as select .*//g')' failed to be created, possibly because it depends on another view.\n"
-        should_retry_create_view_statements+=("$create_view_statement")
-      else
-          sleep 1 # Wait a bit for Impala to register that the view was created, before possibly referencing it by another view.
-      fi
-    done
-
-    new_num_of_views_to_retry=${#should_retry_create_view_statements}
-    if [[ $new_num_of_views_to_retry -eq $previous_num_of_views_to_retry ]]; then
-      echo -e "\n\nERROR: THE NUMBER OF VIEWS TO RETRY HAS NOT BEEN REDUCED! THE SCRIPT IS LIKELY GOING TO AN INFINITE-LOOP! EXITING..\n\n"
-      return 3
-    elif [[ $new_num_of_views_to_retry -gt 0 ]]; then
-      echo -e "\nTo be retried \"create_view_statements\":\n\n${should_retry_create_view_statements[@]}\n"
-      previous_num_of_views_to_retry=$new_num_of_views_to_retry
-    else
-      echo -e "\nFinished creating views, for db: '${db}', in level-${level_counter}.\n"
-    fi
-    all_create_view_statements=("${should_retry_create_view_statement[@]}") # This is needed in any case to either move forward with the rest of the views or stop at 0 remaining views.
-  done
-
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
-
-  echo -e "\nComputing stats for tables..\n"
-  entities_on_impala=`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`
-  for i in ${entities_on_impala[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Taking the create table statement from the Ocean cluster, just to check if its a view, as the output is easier than using impala-shell from Impala cluster.
-    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
-    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}";
-    fi
-  done
-
-  if [ "${entities_on_impala[@]}" == "${entities_on_ocean[@]}" ]; then
-    echo -e "\nAll entities have been copied to Impala cluster.\n"
-  else
-    echo -e "\n\nERROR: 1 OR MORE ENTITIES OF DB '${db}' FAILED TO BE COPIED TO IMPALA CLUSTER!\n\n"
-    rm -f error.log
-    return 4
-  fi
-
-  rm -f error.log
-  echo -e "\n\nFinished processing db: ${db}\n\n"
-}
-
-
-MONITOR_DB=$1
-#HADOOP_USER_NAME=$2
-copydb $MONITOR_DB
-
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/finalizeImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/finalizeImpalaCluster.sh
@ -1,41 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-SOURCE=$1
-PRODUCTION=$2
-SHADOW=$3
-MONITOR_PROD=$4
-MONITOR_IRISH_PROD=$5
-
-
-echo ${SOURCE}
-echo ${PRODUCTION}
-
-#echo "Updating ${PRODUCTION} monitor database old cluster"
-#impala-shell -q "create database if not exists ${PRODUCTION}"
-#impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f -
-#impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
-
-echo "Updating ${PRODUCTION} historical snapshots database"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}"
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-echo "Production monitor db ready!"
-
-impala-shell -i impala-cluster-dn1.openaire.eu -q "drop view ${MONITOR_PROD}.historical_snapshots"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "drop view ${MONITOR_PROD}.historical_snapshots_fos"
-
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create view ${MONITOR_PROD}.historical_snapshots as select * from ${SOURCE}.historical_snapshots"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create view ${MONITOR_PROD}.historical_snapshots_fos as select * from ${SOURCE}.historical_snapshots_fos"
-
-impala-shell -i impala-cluster-dn1.openaire.eu -q "drop view ${MONITOR_IRISH_PROD}.historical_snapshots_irish"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "drop view ${MONITOR_IRISH_PROD}.historical_snapshots_irish_fos"
-
-
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create view ${MONITOR_IRISH_PROD}.historical_snapshots_irish as select * from ${SOURCE}.historical_snapshots_irish"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create view ${MONITOR_IRISH_PROD}.historical_snapshots_irish_fos as select * from ${SOURCE}.historical_snapshots_irish"
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/hist_snaps.sh
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/hist_snaps.sh
@ -1,27 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-export SOURCE=$1
-export TARGET=$2
-export SHADOW=$3
-export SCRIPT_PATH=$4
-
-
-export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
-export HADOOP_USER_NAME="oozie"
-
-echo "Getting file from " $4
-hdfs dfs -copyToLocal $4
-
-#update Monitor DB IRISH
-#cat CreateDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" | sed "s/GRAPHDB/$3/g1" > foo
-cat buildIrishMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" > foo
-hive $HIVE_OPTS -f foo
-
-echo "Hive shell finished"
-
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/scripts/BuildHistSnapsAll.sql
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/scripts/BuildHistSnapsAll.sql
@ -1,82 +0,0 @@
-INSERT INTO ${hist_db_name}.historical_snapshots_fos_tmp
-SELECT * FROM ${hist_db_name_prev}.historical_snapshots_fos;
-
-INSERT INTO ${hist_db_name}.historical_snapshots_fos_tmp
-select
-    cast(${hist_date} as STRING),
-    count(distinct r.id),
-    r.type,
-    rf.lvl1,
-    rf.lvl2,
-    pf.publicly_funded,
-    r.access_mode,
-    r.gold,
-    r.green,
-    coalesce(gl.green_with_license,0),
-    h.is_hybrid,
-    b.is_bronze_oa,
-    d.in_diamond_journal,
-    t.is_transformative,
-    pr.refereed
-from ${stats_db_name}.result r
-         left outer join ${stats_db_name}.result_fos rf on rf.id=r.id
-         left outer join ${stats_db_name}.indi_pub_publicly_funded pf on pf.id=r.id
-         left outer join ${stats_db_name}.indi_pub_green_with_license gl on gl.id=r.id
-         left outer join ${stats_db_name}.indi_pub_bronze_oa b on b.id=r.id
-         left outer join ${stats_db_name}.indi_pub_diamond d on d.id=r.id
-         left outer join ${stats_db_name}.indi_pub_in_transformative t on t.id=r.id
-         left outer join ${stats_db_name}.indi_pub_hybrid h on h.id=r.id
-         left outer join ${stats_db_name}.result_refereed pr on pr.id=r.id
-group by r.green, r.gold, r.access_mode, r.type, rf.lvl1,rf.lvl2, pf.publicly_funded,r.green, gl.green_with_license,b.is_bronze_oa,d.in_diamond_journal,t.is_transformative,h.is_hybrid,pr.refereed;
-
-drop table if exists ${hist_db_name}.historical_snapshots_fos purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots_fos STORED AS PARQUET AS
-SELECT * FROM ${hist_db_name}.historical_snapshots_fos_tmp;
-
-drop table if exists ${monitor_db_name}.historical_snapshots_fos purge;
-
-create table ${monitor_db_name}.historical_snapshots_fos stored as parquet
-as select * from ${hist_db_name}.historical_snapshots_fos;
-
-drop table ${hist_db_name}.historical_snapshots_fos_tmp purge;
-
-INSERT INTO ${hist_db_name}.historical_snapshots_tmp as
-SELECT * FROM ${hist_db_name_prev}.historical_snapshots;
-
-INSERT INTO ${hist_db_name}.historical_snapshots_tmp
-select
-    cast(${hist_date} as STRING),
-    count(distinct r.id),
-    r.type,
-    pf.publicly_funded,
-    r.access_mode,
-    r.gold,
-    r.green,
-    coalesce(gl.green_with_license,0),
-    h.is_hybrid,
-    b.is_bronze_oa,
-    d.in_diamond_journal,
-    t.is_transformative,
-    pr.refereed
-from ${stats_db_name}.result r
-         left outer join ${stats_db_name}.indi_pub_publicly_funded pf on pf.id=r.id
-         left outer join ${stats_db_name}.indi_pub_green_with_license gl on gl.id=r.id
-         left outer join ${stats_db_name}.indi_pub_bronze_oa b on b.id=r.id
-         left outer join ${stats_db_name}.indi_pub_diamond d on d.id=r.id
-         left outer join ${stats_db_name}.indi_pub_in_transformative t on t.id=r.id
-         left outer join ${stats_db_name}.indi_pub_hybrid h on h.id=r.id
-         left outer join ${stats_db_name}.result_refereed pr on pr.id=r.id
-group by r.green, r.gold, r.access_mode, r.type, pf.publicly_funded,r.green, gl.green_with_license,b.is_bronze_oa,d.in_diamond_journal,t.is_transformative,h.is_hybrid,pr.refereed;
-
-drop table if exists ${hist_db_name}.historical_snapshots purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots STORED AS PARQUET AS
-SELECT * FROM ${hist_db_name}.historical_snapshots_tmp;
-
-drop table if exists ${monitor_db_name}.historical_snapshots purge;
-
-create table ${monitor_db_name}.historical_snapshots stored as parquet
-as select * from ${hist_db_name}.historical_snapshots;
-
-drop table ${hist_db_name}.historical_snapshots_tmp purge;
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/scripts/BuildHistSnapsIrish.sql
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/scripts/BuildHistSnapsIrish.sql
@ -1,91 +0,0 @@
-INSERT INTO ${hist_db_name}.historical_snapshots_fos_irish_tmp
-SELECT * FROM ${hist_db_name_prev}.historical_snapshots_irish_fos;
-
-INSERT INTO ${hist_db_name}.historical_snapshots_fos_irish_tmp
-select
-    cast(${hist_date} as STRING),
-    count(distinct r.id),
-    r.type,
-    rf.lvl1,
-    rf.lvl2,
-    pf.publicly_funded,
-    r.access_mode,
-    r.gold,
-    r.green,
-    coalesce(gl.green_with_license,0),
-    h.is_hybrid,
-    b.is_bronze_oa,
-    d.in_diamond_journal,
-    t.is_transformative,
-    pr.refereed
-from ${stats_irish_db_name}.result r
-         left outer join ${stats_irish_db_name}.result_fos rf on rf.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_publicly_funded pf on pf.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_green_with_license gl on gl.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_bronze_oa b on b.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_diamond d on d.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_in_transformative t on t.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_hybrid h on h.id=r.id
-         left outer join ${stats_irish_db_name}.result_refereed pr on pr.id=r.id
-group by r.green, r.gold, r.access_mode, r.type, rf.lvl1,rf.lvl2, pf.publicly_funded,r.green, gl.green_with_license,b.is_bronze_oa,d.in_diamond_journal,t.is_transformative,h.is_hybrid,pr.refereed;
-
-drop table if exists ${hist_db_name}.historical_snapshots_irish_fos purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots_irish_fos STORED AS PARQUET AS
-SELECT * FROM ${hist_db_name}.historical_snapshots_fos_irish_tmp;
-
-drop table if exists ${monitor_irish_db_name}.historical_snapshots_irish_fos purge;
-
-create table ${monitor_irish_db_name}.historical_snapshots_irish_fos stored as parquet
-as select * from ${hist_db_name}.historical_snapshots_irish_fos;
-
-drop table ${hist_db_name}.historical_snapshots_fos_irish_tmp purge;
-
-INSERT INTO ${hist_db_name}.historical_snapshots_irish_tmp
-SELECT * FROM ${hist_db_name_prev}.historical_snapshots_irish;
-
-INSERT INTO ${hist_db_name}.historical_snapshots_irish_tmp
-select
-    cast(${hist_date} as STRING),
-    count(distinct r.id),
-    r.type,
-    pf.publicly_funded,
-    r.access_mode,
-    r.gold,
-    r.green,
-    coalesce(gl.green_with_license,0),
-    h.is_hybrid,
-    b.is_bronze_oa,
-    d.in_diamond_journal,
-    t.is_transformative,
-    pr.refereed
-from ${stats_irish_db_name}.result r
-         left outer join ${stats_irish_db_name}.indi_pub_publicly_funded pf on pf.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_green_with_license gl on gl.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_bronze_oa b on b.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_diamond d on d.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_in_transformative t on t.id=r.id
-         left outer join ${stats_irish_db_name}.indi_pub_hybrid h on h.id=r.id
-         left outer join ${stats_irish_db_name}.result_refereed pr on pr.id=r.id
-group by r.green, r.gold, r.access_mode, r.type, pf.publicly_funded,r.green, gl.green_with_license,b.is_bronze_oa,d.in_diamond_journal,t.is_transformative,h.is_hybrid,pr.refereed;
-
-
-drop table if exists ${hist_db_name}.historical_snapshots_irish purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots_irish STORED AS PARQUET AS
-SELECT * FROM ${hist_db_name}.historical_snapshots_irish_tmp;
-
-drop table if exists ${monitor_irish_db_name}.historical_snapshots_irish purge;
-
-create table ${monitor_irish_db_name}.historical_snapshots_irish stored as parquet
-as select * from ${hist_db_name}.historical_snapshots_irish;
-
-drop table ${hist_db_name}.historical_snapshots_irish_tmp purge;
-
-
-drop table if exists ${monitor_irish_db_name}.historical_snapshots_irish_fos purge;
-
-create table ${monitor_irish_db_name}.historical_snapshots_irish_fos stored as parquet
-as select * from ${hist_db_name}.historical_snapshots_irish_fos;
-
-drop table ${hist_db_name}.historical_snapshots_fos_irish_tmp purge;
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/scripts/CreateDB.sql
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/scripts/CreateDB.sql
@ -1,92 +0,0 @@
--------------------------------------------------------------
--------------------------------------------------------------
-- Historical Snapshots database creation
--------------------------------------------------------------
--------------------------------------------------------------
-
-DROP database IF EXISTS ${hist_db_name} CASCADE;
-CREATE database ${hist_db_name};
-
-drop table if exists ${hist_db_name}.historical_snapshots_fos_tmp purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots_fos_tmp
-(
-    hist_date        STRING,
-    total            INT,
-    type             STRING,
-    lvl1             STRING,
-    lvl2             STRING,
-    publicly_funded  INT,
-    accessrights      STRING,
-    gold            INT,
-    green          INT,
-    green_with_license          INT,
-    hybrid           INT,
-    bronze         INT,
-    diamond             INT,
-    transformative  INT,
-    peer_reviewed   STRING
-)
-CLUSTERED BY (hist_date) INTO 100 buckets  stored as orc tblproperties ('transactional' = 'true');
-
-drop table if exists ${hist_db_name}.historical_snapshots_fos_irish_tmp purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots_fos_irish_tmp
-(
-    hist_date        STRING,
-    total            INT,
-    type             STRING,
-    lvl1             STRING,
-    lvl2             STRING,
-    publicly_funded  INT,
-    accessrights      STRING,
-    gold            INT,
-    green          INT,
-    green_with_license          INT,
-    hybrid           INT,
-    bronze         INT,
-    diamond             INT,
-    transformative  INT,
-    peer_reviewed   STRING
-)
-CLUSTERED BY (hist_date) INTO 100 buckets  stored as orc tblproperties ('transactional' = 'true');
-
-drop table if exists ${hist_db_name}.historical_snapshots_tmp purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots_tmp
-(
-    hist_date        STRING,
-    total            INT,
-    type             STRING,
-    publicly_funded  INT,
-    accessrights      STRING,
-    gold            INT,
-    green          INT,
-    green_with_license          INT,
-    hybrid           INT,
-    bronze         INT,
-    diamond             INT,
-    transformative  INT,
-    peer_reviewed   STRING
-)
-CLUSTERED BY (hist_date) INTO 100 buckets  stored as orc tblproperties ('transactional' = 'true');
-
-drop table if exists ${hist_db_name}.historical_snapshots_irish_tmp purge;
-
-CREATE TABLE ${hist_db_name}.historical_snapshots_irish_tmp
-(
-    hist_date        STRING,
-    total            INT,
-    type             STRING,
-    publicly_funded  INT,
-    accessrights      STRING,
-    gold            INT,
-    green          INT,
-    green_with_license          INT,
-    hybrid           INT,
-    bronze         INT,
-    diamond             INT,
-    transformative  INT,
-    peer_reviewed   STRING
-)
-CLUSTERED BY (hist_date) INTO 100 buckets  stored as orc tblproperties ('transactional' = 'true');
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/workflow.xml
@ -1,159 +0,0 @@
-<workflow-app name="Stats Hist Snapshots" xmlns="uri:oozie:workflow:0.5">
-    <parameters>
-        <property>
-            <name>hist_db_name</name>
-            <description>the target hist database name</description>
-        </property>
-        <property>
-            <name>hist_db_name_prev</name>
-            <description>the hist database name of previous_month</description>
-        </property>
-
-        <property>
-            <name>stats_db_name</name>
-            <description>the stats db name</description>
-        </property>
-        <property>
-            <name>stats_irish_db_name</name>
-            <description>the stats irish db name</description>
-        </property>
-        <property>
-            <name>monitor_db_name</name>
-            <description>the monitor db name</description>
-        </property>
-        <property>
-            <name>monitor_irish_db_name</name>
-            <description>the irish monitor db name</description>
-        </property>
-        <property>
-            <name>hist_db_prod_name</name>
-            <description>the production db</description>
-        </property>
-        <property>
-            <name>hist_db_shadow_name</name>
-            <description>the production shadow db</description>
-        </property>
-        <property>
-            <name>hist_date</name>
-            <description>the snaps date</description>
-        </property>
-        <property>
-            <name>hive_metastore_uris</name>
-            <description>hive server metastore URIs</description>
-        </property>
-        <property>
-            <name>hive_jdbc_url</name>
-            <description>hive server jdbc url</description>
-        </property>
-        <property>
-            <name>hive_timeout</name>
-            <description>the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
-        </property>
-        <property>
-            <name>hadoop_user_name</name>
-            <description>user name of the wf owner</description>
-        </property>
-    </parameters>
-
-    <global>
-        <job-tracker>${jobTracker}</job-tracker>
-        <name-node>${nameNode}</name-node>
-        <configuration>
-            <property>
-                <name>hive.metastore.uris</name>
-                <value>${hive_metastore_uris}</value>
-            </property>
-            <property>
-            	<name>hive.txn.timeout</name>
-            	<value>${hive_timeout}</value>
-            </property>
-	<property>
-	    <name>mapred.job.queue.name</name>
-	    <value>analytics</value>
-	</property>
-        </configuration>
-    </global>
-
-    <start to="resume_from"/>
-    <decision name="resume_from">
-        <switch>
-            <case to="CreateDB">${wf:conf('resumeFrom') eq 'CreateDB'}</case>
-            <case to="BuildHistSnaps">${wf:conf('resumeFrom') eq 'BuildHistSnaps'}</case>
-            <case to="BuildHistSnapsIrish">${wf:conf('resumeFrom') eq 'BuildHistSnapsIrish'}</case>
-            <case to="Step2-copyDataToImpalaCluster">${wf:conf('resumeFrom') eq 'Step2-copyDataToImpalaCluster'}</case>
-            <case to="Step3-finalizeImpalaCluster">${wf:conf('resumeFrom') eq 'Step3-finalizeImpalaCluster'}</case>
-            <default to="BuildHistSnaps"/>
-        </switch>
-    </decision>
-
-    <kill name="Kill">
-        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-
-    <action name="CreateDB">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
-            <jdbc-url>${hive_jdbc_url}</jdbc-url>
-            <script>scripts/CreateDB.sql</script>
-            <param>hist_db_name=${hist_db_name}</param>
-        </hive2>
-        <ok to="BuildHistSnaps"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="BuildHistSnaps">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
-            <jdbc-url>${hive_jdbc_url}</jdbc-url>
-            <script>scripts/BuildHistSnapsAll.sql</script>
-            <param>hist_db_name=${hist_db_name}</param>
-            <param>hist_db_name_prev=${hist_db_name_prev}</param>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>monitor_db_name=${monitor_db_name}</param>
-            <param>hist_date=${hist_date}</param>
-        </hive2>
-        <ok to="BuildHistSnapsIrish"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="BuildHistSnapsIrish">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
-            <jdbc-url>${hive_jdbc_url}</jdbc-url>
-            <script>scripts/BuildHistSnapsIrish.sql</script>
-            <param>hist_db_name=${hist_db_name}</param>
-            <param>hist_db_name_prev=${hist_db_name_prev}</param>
-            <param>stats_irish_db_name=${stats_irish_db_name}</param>
-            <param>monitor_irish_db_name=${monitor_irish_db_name}</param>
-            <param>hist_date=${hist_date}</param>
-        </hive2>
-        <ok to="Step2-copyDataToImpalaCluster"/>
-        <error to="Kill"/>
-    </action>
-    <action name="Step2-copyDataToImpalaCluster">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>copyDataToImpalaCluster.sh</exec>
-            <argument>${hist_db_name}</argument>
-            <argument>${hadoop_user_name}</argument>
-            <file>copyDataToImpalaCluster.sh</file>
-        </shell>
-        <ok to="Step3-finalizeImpalaCluster"/>
-        <error to="Kill"/>
-    </action>
-    <action name="Step3-finalizeImpalaCluster">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>finalizeImpalaCluster.sh</exec>
-            <argument>${hist_db_name}</argument>
-            <argument>${hist_db_prod_name}</argument>
-            <argument>${hist_db_shadow_name}</argument>
-            <argument>${monitor_db_prod_name}</argument>
-            <argument>${monitor_irish_db_prod_name}</argument>
-            <file>finalizeImpalaCluster.sh</file>
-        </shell>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-    <end name="End"/>
-</workflow-app>
--- a/dhp-workflows/dhp-stats-monitor-irish/pom.xml
+++ b/dhp-workflows/dhp-stats-monitor-irish/pom.xml
@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>dhp-workflows</artifactId>
-        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.5-SNAPSHOT</version>
-    </parent>
-    <modelVersion>4.0.0</modelVersion>
-    <artifactId>dhp-stats-monitor-irish</artifactId>
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.11</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_2.11</artifactId>
-        </dependency>
-    </dependencies>
-	<build>
-		<plugins>
-			<plugin>
-				<groupId>pl.project13.maven</groupId>
-				<artifactId>git-commit-id-plugin</artifactId>
-                <version>2.1.11</version>
-				<configuration>
-					<failOnNoGitDirectory>false</failOnNoGitDirectory>
-				</configuration>
-			</plugin>
-		</plugins>
-	</build>
-</project>
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/config-default.xml
@ -1,30 +0,0 @@
-<configuration>
-    <property>
-        <name>jobTracker</name>
-        <value>${jobTracker}</value>
-    </property>
-    <property>
-        <name>nameNode</name>
-        <value>${nameNode}</value>
-    </property>
-    <property>
-        <name>oozie.use.system.libpath</name>
-        <value>true</value>
-    </property>
-    <property>
-        <name>oozie.action.sharelib.for.spark</name>
-        <value>spark2</value>
-    </property>
-    <property>
-        <name>hive_metastore_uris</name>
-        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
-    </property>
-    <property>
-        <name>hive_jdbc_url</name>
-        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=22166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=15596411699;spark.yarn.driver.memoryOverhead=1228</value>
-    </property>
-	<property>
-		<name>oozie.wf.workflow.notification.url</name>
-		<value>{serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status</value>
-	</property>
-</configuration>
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh
@ -1,222 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-export HADOOP_USER_NAME=$2
-
-# Set the active HDFS node of OCEAN and IMPALA cluster.
-OCEAN_HDFS_NODE='hdfs://nameservice1'
-echo -e "\nOCEAN HDFS virtual-name which resolves automatically to the active-node: ${OCEAN_HDFS_NODE}"
-
-IMPALA_HDFS_NODE=''
-COUNTER=0
-while [ $COUNTER -lt 3 ]; do
-  if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
-      break
-  elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
-      break
-  else
-      IMPALA_HDFS_NODE=''
-      sleep 1
-  fi
-  ((COUNTER++))
-done
-if [ -z "$IMPALA_HDFS_NODE" ]; then
-    echo -e "\n\nERROR: PROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! | AFTER ${COUNTER} RETRIES.\n\n"
-    exit 1
-fi
-echo -e "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries.\n\n"
-
-IMPALA_HOSTNAME='impala-cluster-dn1.openaire.eu'
-IMPALA_CONFIG_FILE='/etc/impala_cluster/hdfs-site.xml'
-
-IMPALA_HDFS_DB_BASE_PATH="${IMPALA_HDFS_NODE}/user/hive/warehouse"
-
-
-# Set sed arguments.
-LOCATION_HDFS_NODE_SED_ARG="s|${OCEAN_HDFS_NODE}|${IMPALA_HDFS_NODE}|g" # This requires to be used with "sed -e" in order to have the "|" delimiter (as the "/" conflicts with the URIs)
-
-# Set the SED command arguments for column-names with reserved words:
-DATE_SED_ARG_1='s/[[:space:]]\date[[:space:]]/\`date\`/g'
-DATE_SED_ARG_2='s/\.date,/\.\`date\`,/g'  # the "date" may be part of a larger field name like "datestamp" or "date_aggregated", so we need to be careful with what we are replacing.
-DATE_SED_ARG_3='s/\.date[[:space:]]/\.\`date\` /g'
-
-HASH_SED_ARG_1='s/[[:space:]]\hash[[:space:]]/\`hash\`/g'
-HASH_SED_ARG_2='s/\.hash,/\.\`hash\`,/g'
-HASH_SED_ARG_3='s/\.hash[[:space:]]/\.\`hash\` /g'
-
-LOCATION_SED_ARG_1='s/[[:space:]]\location[[:space:]]/\`location\`/g'
-LOCATION_SED_ARG_2='s/\.location,/\.\`location\`,/g'
-LOCATION_SED_ARG_3='s/\.location[[:space:]]/\.\`location\` /g'
-
-
-function copydb() {
-  db=$1
-  echo -e "\nStart processing db: '${db}'..\n"
-
-  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-  if [ -n "$log_errors" ]; then
-    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE! EXITING...\n\n"
-    rm -f error.log
-    return 1
-  fi
-
-  # Make Impala aware of the deletion of the old DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-
-  echo -e "\n\nCopying files of '${db}', from Ocean to Impala cluster..\n"
-  # Using max-bandwidth of: 50 * 100 Mb/s = 5 Gb/s
-  # Using max memory of: 50 * 6144 = 300 Gb
-  # Using 1MB as a buffer-size.
-  # The " -Ddistcp.dynamic.recordsPerChunk=50" arg is not available in our version of hadoop
-  # The "ug" args cannot be used as we get a "User does not belong to hive" error.
-  # The "p" argument cannot be used, as it blocks the files from being used, giving a "sticky bit"-error, even after applying chmod and chown onm the files.
-  hadoop distcp -Dmapreduce.map.memory.mb=6144 -m 70 -bandwidth 150 \
-                -numListstatusThreads 40 \
-                -copybuffersize 1048576 \
-                -strategy dynamic \
-                -pb \
-                ${OCEAN_HDFS_NODE}/user/hive/warehouse/${db}.db ${IMPALA_HDFS_DB_BASE_PATH}
-
-  # Check the exit status of the "hadoop distcp" command.
-  if [ $? -eq 0 ]; then
-    echo -e "\nSuccessfully copied the files of '${db}'.\n"
-  else
-    echo -e "\n\nERROR: FAILED TO TRANSFER THE FILES OF '${db}', WITH 'hadoop distcp'. GOT WITH EXIT STATUS: $?\n\n"
-    rm -f error.log
-    return 2
-  fi
-
-  # In case we ever use this script for a writable DB (using inserts/updates), we should perform the following costly operation as well..
-  #hdfs dfs -conf ${IMPALA_CONFIG_FILE} -chmod -R 777 ${TEMP_SUBDIR_FULLPATH}/${db}.db
-
-  echo -e "\nCreating schema for db: '${db}'\n"
-
-  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
-
-  # Make Impala aware of the creation of the new DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
-  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
-  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
-
-  all_create_view_statements=()
-
-  entities_on_ocean=`hive -e "show tables in ${db};" | sed 's/WARN:.*//g'`  # Get the tables and views without any potential the "WARN" logs.
-  for i in ${entities_on_ocean[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Check if this is a view by showing the create-statement where it should print "create view" for a view, not the "create table". Unfortunately, there is no "show views" command.
-    create_entity_statement=`hive -e "show create table ${db}.${i};"` # It needs to happen in two stages, otherwise the "grep" is not able to match multi-line statement.
-
-    create_view_statement_test=`echo -e "$create_entity_statement" | grep 'CREATE VIEW'`
-    if [ -n "$create_view_statement_test" ]; then
-      echo -e "\n'${i}' is a view, so we will save its 'create view' statement and execute it on Impala, after all tables have been created.\n"
-      create_view_statement=`echo -e "$create_entity_statement" | sed 's/WARN:.*//g' | sed 's/\`//g' \
-        | sed 's/"$/;/' | sed 's/^"//' | sed 's/\\"\\"/\"/g' | sed -e "${LOCATION_HDFS_NODE_SED_ARG}" | sed "${DATE_SED_ARG_1}" | sed "${HASH_SED_ARG_1}" | sed "${LOCATION_SED_ARG_1}" \
-        | sed "${DATE_SED_ARG_2}" | sed "${HASH_SED_ARG_2}" | sed "${LOCATION_SED_ARG_2}" \
-        | sed "${DATE_SED_ARG_3}" | sed "${HASH_SED_ARG_3}" | sed "${LOCATION_SED_ARG_3}"`
-      all_create_view_statements+=("$create_view_statement")
-    else
-      echo -e "\n'${i}' is a table, so we will check for its parquet files and create the table on Impala cluster.\n"
-      CURRENT_PRQ_FILE=`hdfs dfs -conf ${IMPALA_CONFIG_FILE} -ls -C "${IMPALA_HDFS_DB_BASE_PATH}/${db}.db/${i}/" | grep -v 'Found' | grep -v '_impala_insert_staging' |  head -1`
-      if [ -z "$CURRENT_PRQ_FILE" ]; then # If there is not parquet-file inside.
-          echo -e "\nERROR: THE TABLE \"${i}\" HAD NO FILES TO GET THE SCHEMA FROM! IT'S EMPTY!\n\n"
-      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
-        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-        if [ -n "$log_errors" ]; then
-          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
-        fi
-      fi
-    fi
-  done
-
-  echo -e "\nAll tables have been created, going to create the views..\n"
-
-  # Time to loop through the views and create them.
-  # At this point all table-schemas should have been created.
-
-  previous_num_of_views_to_retry=${#all_create_view_statements}
-  if [[ $previous_num_of_views_to_retry -gt 0 ]]; then
-    echo -e "\nAll_create_view_statements:\n\n${all_create_view_statements[@]}\n"  # DEBUG
-    # Make Impala aware of the new tables, so it knows them when creating the views.
-    sleep 1
-    impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-    sleep 1
-  else
-    echo -e "\nDB '${db}' does not contain any views.\n"
-  fi
-
-  level_counter=0
-  while [[ ${#all_create_view_statements[@]} -gt 0 ]]; do
-    ((level_counter++))
-    # The only accepted reason for a view to not be created, is if it depends on another view, which has not been created yet.
-    # In this case, we should retry creating this particular view again.
-    should_retry_create_view_statements=()
-
-    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
-      if [ -n "$specific_errors" ]; then
-        echo -e "\nspecific_errors: ${specific_errors}\n"
-        echo -e "\nView '$(cat error.log | grep "CREATE VIEW " | sed 's/CREATE VIEW //g' | sed 's/ as select .*//g')' failed to be created, possibly because it depends on another view.\n"
-        should_retry_create_view_statements+=("$create_view_statement")
-      else
-          sleep 1 # Wait a bit for Impala to register that the view was created, before possibly referencing it by another view.
-      fi
-    done
-
-    new_num_of_views_to_retry=${#should_retry_create_view_statements}
-    if [[ $new_num_of_views_to_retry -eq $previous_num_of_views_to_retry ]]; then
-      echo -e "\n\nERROR: THE NUMBER OF VIEWS TO RETRY HAS NOT BEEN REDUCED! THE SCRIPT IS LIKELY GOING TO AN INFINITE-LOOP! EXITING..\n\n"
-      return 3
-    elif [[ $new_num_of_views_to_retry -gt 0 ]]; then
-      echo -e "\nTo be retried \"create_view_statements\":\n\n${should_retry_create_view_statements[@]}\n"
-      previous_num_of_views_to_retry=$new_num_of_views_to_retry
-    else
-      echo -e "\nFinished creating views, for db: '${db}', in level-${level_counter}.\n"
-    fi
-    all_create_view_statements=("${should_retry_create_view_statement[@]}") # This is needed in any case to either move forward with the rest of the views or stop at 0 remaining views.
-  done
-
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
-
-  echo -e "\nComputing stats for tables..\n"
-  entities_on_impala=`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`
-  for i in ${entities_on_impala[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Taking the create table statement from the Ocean cluster, just to check if its a view, as the output is easier than using impala-shell from Impala cluster.
-    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
-    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}";
-    fi
-  done
-
-  if [ "${entities_on_impala[@]}" == "${entities_on_ocean[@]}" ]; then
-    echo -e "\nAll entities have been copied to Impala cluster.\n"
-  else
-    echo -e "\n\nERROR: 1 OR MORE ENTITIES OF DB '${db}' FAILED TO BE COPIED TO IMPALA CLUSTER!\n\n"
-    rm -f error.log
-    return 4
-  fi
-
-  rm -f error.log
-  echo -e "\n\nFinished processing db: ${db}\n\n"
-}
-
-
-MONITOR_DB=$1
-#HADOOP_USER_NAME=$2
-copydb $MONITOR_DB
-
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/finalizeImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/finalizeImpalaCluster.sh
@ -1,23 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-SOURCE=$1
-PRODUCTION=$2
-echo ${SOURCE}
-echo ${PRODUCTION}
-
-#echo "Updating ${PRODUCTION} monitor database old cluster"
-#impala-shell -q "create database if not exists ${PRODUCTION}"
-#impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f -
-#impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
-
-echo "Updating ${PRODUCTION} monitor database"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}"
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-echo "Production monitor db ready!"
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/monitor_irish.sh
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/monitor_irish.sh
@ -1,28 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-export SOURCE=$1
-export TARGET=$2
-export SHADOW=$3
-export SCRIPT_PATH=$4
-export GRAPHDB=$5
-
-
-export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
-export HADOOP_USER_NAME="oozie"
-
-echo "Getting file from " $4
-hdfs dfs -copyToLocal $4
-
-#update Monitor DB IRISH
-#cat CreateDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" | sed "s/GRAPHDB/$3/g1" > foo
-cat buildIrishMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" | sed "s/GRAPHDB/$5/g1" > foo
-hive $HIVE_OPTS -f foo
-
-echo "Hive shell finished"
-
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql
@ -1,241 +0,0 @@
-drop database if exists TARGET cascade;
-create database if not exists TARGET;
-
-create view if not exists TARGET.category as select * from SOURCE.category;
-create view if not exists TARGET.concept as select * from SOURCE.concept;
-create view if not exists TARGET.context as select * from SOURCE.context;
-create view if not exists TARGET.country as select * from SOURCE.country;
-create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
-create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
--create view if not exists TARGET.funder as select * from SOURCE.funder;
-create view if not exists TARGET.fundref as select * from SOURCE.fundref;
-create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
-create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
-create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
-create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
-create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
-create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
-
-drop table if exists TARGET.irish_funders;
-
-create TEMPORARY table TARGET.irish_funders as
-select distinct xpath_string(fundingtree[0].value, '//funder/name') as funder from GRAPHDB.project
-                     where xpath_string(fundingtree[0].value, '//funder/jurisdiction')='IE';
--create TEMPORARY table TARGET.irish_funders as
--select distinct name as funder from SOURCE.fundref where country='IE';
-
-drop table if exists TARGET.result;
-
-create table TARGET.result stored as parquet as
-select distinct * from (
-       select r.*
-       from SOURCE.result r
-                join SOURCE.result_projects rp on rp.id=r.id
-                join SOURCE.project p on p.id=rp.project
-                join openaire_prod_stats_monitor_ie_20231226b.irish_funders irf on irf.funder=p.funder
-       union all
-       select r.*
-       from SOURCE.result r
-                join SOURCE.result_organization ro on ro.id=r.id
-                join SOURCE.organization o on o.id=ro.organization and o.country='IE'
-       union all
-       select r.*
-       from SOURCE.result r
-                join SOURCE.result_pids pid on pid.id=r.id
-                join stats_ext.transformative_facts tf on tf.doi=pid.pid
-   ) foo;
-
-create view if not exists TARGET.category as select * from SOURCE.category;
-create view if not exists TARGET.concept as select * from SOURCE.concept;
-create view if not exists TARGET.context as select * from SOURCE.context;
-create view if not exists TARGET.country as select * from SOURCE.country;
-create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
-create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
-
-create table TARGET.funder stored as parquet as select * from SOURCE.funder where country='IE';
-
-create view if not exists TARGET.fundref as select * from SOURCE.fundref;
-create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
-create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
-create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
-create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
-create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
-create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--create view if not exists TARGET.graduatedoctorates as select * from SOURCE.graduatedoctorates;
-
-create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_datasources stored as parquet as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_fundercount stored as parquet as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_gold stored as parquet as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_greenoa stored as parquet as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized;
-
-create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_organization stored as parquet as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_peerreviewed stored as parquet as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_pids stored as parquet as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_projectcount stored as parquet as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_projects stored as parquet as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_refereed stored as parquet as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_sources stored as parquet as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
-create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
-create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
-drop view TARGET.foo1;
-drop view TARGET.foo2;
-
-- datasources
-create view if not exists TARGET.datasource as select * from SOURCE.datasource;
-create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids;
-create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations;
-create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources;
-
-create table TARGET.datasource_results stored as parquet as select id as result, datasource as id from TARGET.result_datasources;
-
-- organizations
-create view if not exists TARGET.organization as select * from SOURCE.organization;
-create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources;
-create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids;
-create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects;
-create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources;
-
-- projects
-create view if not exists TARGET.project as select * from SOURCE.project;
-create view if not exists TARGET.project_oids as select * from SOURCE.project_oids;
-create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations;
-create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount;
-create view if not exists TARGET.project_classification as select * from SOURCE.project_classification;
-create view if not exists TARGET.project_organization_contribution as select * from SOURCE.project_organization_contribution;
-
-create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects;
-
-
-- indicators
-- Sprint 1 ----
-create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-- Sprint 2 ----
-create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
---- Sprint 3 ----
-create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab;
-create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab;
-create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org;
-create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country;
-create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab;
-create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab;
---- Sprint 4 ----
-create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
---- Sprint 5 ----
-create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
---- Sprint 6 ----
-create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
-
-create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
-
-create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
-
-create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
-
---- Sprint 7 ----
-create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness;
-create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr;
-create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year;
-create view TARGET.indi_org_fairness_pub as select * from SOURCE.indi_org_fairness_pub;
-create view TARGET.indi_org_fairness_year as select * from SOURCE.indi_org_fairness_year;
-create view TARGET.indi_org_findable_year as select * from SOURCE.indi_org_findable_year;
-create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable;
-create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess;
-create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year;
-create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
-create view TARGET.indi_is_funder_plan_s as select * from SOURCE.indi_is_funder_plan_s;
-create view TARGET.indi_funder_fairness as select * from SOURCE.indi_funder_fairness;
-create view TARGET.indi_funder_openess as select * from SOURCE.indi_funder_openess;
-create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable;
-create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness;
-create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess;
-create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable;
-
-create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.indi_pub_publicly_funded stored as parquet as select * from SOURCE.indi_pub_publicly_funded orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_oa_with_license stored as parquet as select * from SOURCE.indi_result_oa_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.indi_result_oa_without_license stored as parquet as select * from SOURCE.indi_result_oa_without_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_under_transformative stored as parquet as select * from SOURCE.indi_result_under_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/workflow.xml
@ -1,118 +0,0 @@
-<workflow-app name="Irish Monitor Update" xmlns="uri:oozie:workflow:0.5">
-    <parameters>
-        <property>
-            <name>stats_db_name</name>
-            <description>the target stats database name</description>
-        </property>
-        <property>
-            <name>graph_db_name</name>
-            <description>the graph database name</description>
-        </property>
-        <property>
-            <name>monitor_irish_db_name</name>
-            <description>the target monitor db name</description>
-        </property>
-        <property>
-            <name>monitor_irish_db_prod_name</name>
-            <description>the name of the production monitor db</description>
-        </property>
-        <property>
-            <name>monitor_irish_db_shadow_name</name>
-            <description>the name of the shadow monitor db</description>
-        </property>
-        <property>
-            <name>hive_metastore_uris</name>
-            <description>hive server metastore URIs</description>
-        </property>
-        <property>
-            <name>hive_jdbc_url</name>
-            <description>hive server jdbc url</description>
-        </property>
-        <property>
-            <name>hive_timeout</name>
-            <description>the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
-        </property>
-        <property>
-            <name>hadoop_user_name</name>
-            <description>user name of the wf owner</description>
-        </property>
-    </parameters>
-
-    <global>
-        <job-tracker>${jobTracker}</job-tracker>
-        <name-node>${nameNode}</name-node>
-        <configuration>
-            <property>
-                <name>hive.metastore.uris</name>
-                <value>${hive_metastore_uris}</value>
-            </property>
-            <property>
-            	<name>hive.txn.timeout</name>
-            	<value>${hive_timeout}</value>
-            </property>
-	<property>
-	    <name>mapred.job.queue.name</name>
-	    <value>analytics</value>
-	</property>
-        </configuration>
-    </global>
-
-    <start to="resume_from"/>
-    <decision name="resume_from">
-        <switch>
-            <case to="Step1-buildIrishMonitorDB">${wf:conf('resumeFrom') eq 'Step1-buildIrishMonitorDB'}</case>
-            <case to="Step2-copyDataToImpalaCluster">${wf:conf('resumeFrom') eq 'Step2-copyDataToImpalaCluster'}</case>
-            <case to="Step3-finalizeImpalaCluster">${wf:conf('resumeFrom') eq 'Step3-finalizeImpalaCluster'}</case>
-            <default to="Step1-buildIrishMonitorDB"/>
-        </switch>
-    </decision>
-
-    <kill name="Kill">
-        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-
-    <action name="Step1-buildIrishMonitorDB">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>monitor_irish.sh</exec>
-            <argument>${stats_db_name}</argument>
-            <argument>${monitor_irish_db_name}</argument>
-            <argument>${monitor_irish_db_shadow_name}</argument>
-            <argument>${wf:appPath()}/scripts/buildIrishMonitorDB.sql</argument>
-            <argument>${graph_db_name}</argument>
-            <file>monitor_irish.sh</file>
-        </shell>
-        <ok to="Step2-copyDataToImpalaCluster"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="Step2-copyDataToImpalaCluster">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>copyDataToImpalaCluster.sh</exec>
-            <argument>${monitor_irish_db_name}</argument>
-            <argument>${hadoop_user_name}</argument>
-            <file>copyDataToImpalaCluster.sh</file>
-        </shell>
-        <ok to="Step3-finalizeImpalaCluster"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="Step3-finalizeImpalaCluster">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>finalizeImpalaCluster.sh</exec>
-            <argument>${monitor_irish_db_name}</argument>
-            <argument>${monitor_irish_db_prod_name}</argument>
-            <argument>${monitor_irish_db_shadow_name}</argument>
-            <file>finalizeImpalaCluster.sh</file>
-        </shell>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-    <end name="End"/>
-</workflow-app>
--- a/dhp-workflows/dhp-stats-monitor-update/pom.xml
+++ b/dhp-workflows/dhp-stats-monitor-update/pom.xml
@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>dhp-workflows</artifactId>
-        <groupId>eu.dnetlib.dhp</groupId>
-        <version>1.2.5-SNAPSHOT</version>
-    </parent>
-    <modelVersion>4.0.0</modelVersion>
-    <artifactId>dhp-stats-monitor-update</artifactId>
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.11</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_2.11</artifactId>
-        </dependency>
-    </dependencies>
-	<build>
-		<plugins>
-			<plugin>
-				<groupId>pl.project13.maven</groupId>
-				<artifactId>git-commit-id-plugin</artifactId>
-                <version>2.1.11</version>
-				<configuration>
-					<failOnNoGitDirectory>false</failOnNoGitDirectory>
-				</configuration>
-			</plugin>
-		</plugins>
-	</build>
-</project>
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml
@ -1,30 +0,0 @@
-<configuration>
-    <property>
-        <name>jobTracker</name>
-        <value>${jobTracker}</value>
-    </property>
-    <property>
-        <name>nameNode</name>
-        <value>${nameNode}</value>
-    </property>
-    <property>
-        <name>oozie.use.system.libpath</name>
-        <value>true</value>
-    </property>
-    <property>
-        <name>oozie.action.sharelib.for.spark</name>
-        <value>spark2</value>
-    </property>
-    <property>
-        <name>hive_metastore_uris</name>
-        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
-    </property>
-    <property>
-        <name>hive_jdbc_url</name>
-        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=22166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=15596411699;spark.yarn.driver.memoryOverhead=1228</value>
-    </property>
-	<property>
-		<name>oozie.wf.workflow.notification.url</name>
-		<value>{serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status</value>
-	</property>
-</configuration>
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh
@ -1,223 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-export HADOOP_USER_NAME=$2
-
-# Set the active HDFS node of OCEAN and IMPALA cluster.
-OCEAN_HDFS_NODE='hdfs://nameservice1'
-echo -e "\nOCEAN HDFS virtual-name which resolves automatically to the active-node: ${OCEAN_HDFS_NODE}"
-
-IMPALA_HDFS_NODE=''
-COUNTER=0
-while [ $COUNTER -lt 3 ]; do
-  if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
-      break
-  elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
-      break
-  else
-      IMPALA_HDFS_NODE=''
-      sleep 1
-  fi
-  ((COUNTER++))
-done
-if [ -z "$IMPALA_HDFS_NODE" ]; then
-    echo -e "\n\nERROR: PROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! | AFTER ${COUNTER} RETRIES.\n\n"
-    exit 1
-fi
-echo -e "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries.\n\n"
-
-IMPALA_HOSTNAME='impala-cluster-dn1.openaire.eu'
-IMPALA_CONFIG_FILE='/etc/impala_cluster/hdfs-site.xml'
-
-IMPALA_HDFS_DB_BASE_PATH="${IMPALA_HDFS_NODE}/user/hive/warehouse"
-
-
-# Set sed arguments.
-LOCATION_HDFS_NODE_SED_ARG="s|${OCEAN_HDFS_NODE}|${IMPALA_HDFS_NODE}|g" # This requires to be used with "sed -e" in order to have the "|" delimiter (as the "/" conflicts with the URIs)
-
-# Set the SED command arguments for column-names with reserved words:
-DATE_SED_ARG_1='s/[[:space:]]\date[[:space:]]/\`date\`/g'
-DATE_SED_ARG_2='s/\.date,/\.\`date\`,/g'  # the "date" may be part of a larger field name like "datestamp" or "date_aggregated", so we need to be careful with what we are replacing.
-DATE_SED_ARG_3='s/\.date[[:space:]]/\.\`date\` /g'
-
-HASH_SED_ARG_1='s/[[:space:]]\hash[[:space:]]/\`hash\`/g'
-HASH_SED_ARG_2='s/\.hash,/\.\`hash\`,/g'
-HASH_SED_ARG_3='s/\.hash[[:space:]]/\.\`hash\` /g'
-
-LOCATION_SED_ARG_1='s/[[:space:]]\location[[:space:]]/\`location\`/g'
-LOCATION_SED_ARG_2='s/\.location,/\.\`location\`,/g'
-LOCATION_SED_ARG_3='s/\.location[[:space:]]/\.\`location\` /g'
-
-
-function copydb() {
-  db=$1
-  echo -e "\nStart processing db: '${db}'..\n"
-
-  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-  if [ -n "$log_errors" ]; then
-    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE! EXITING...\n\n"
-    rm -f error.log
-    return 1
-  fi
-
-  # Make Impala aware of the deletion of the old DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-
-  echo -e "\n\nCopying files of '${db}', from Ocean to Impala cluster..\n"
-  # Using max-bandwidth of: 50 * 100 Mb/s = 5 Gb/s
-  # Using max memory of: 50 * 6144 = 300 Gb
-  # Using 1MB as a buffer-size.
-  # The " -Ddistcp.dynamic.recordsPerChunk=50" arg is not available in our version of hadoop
-  # The "ug" args cannot be used as we get a "User does not belong to hive" error.
-  # The "p" argument cannot be used, as it blocks the files from being used, giving a "sticky bit"-error, even after applying chmod and chown onm the files.
-  hadoop distcp -Dmapreduce.map.memory.mb=6144 -m 70 -bandwidth 150 \
-                -numListstatusThreads 40 \
-                -copybuffersize 1048576 \
-                -strategy dynamic \
-                -pb \
-                ${OCEAN_HDFS_NODE}/user/hive/warehouse/${db}.db ${IMPALA_HDFS_DB_BASE_PATH}
-
-  # Check the exit status of the "hadoop distcp" command.
-  if [ $? -eq 0 ]; then
-    echo -e "\nSuccessfully copied the files of '${db}'.\n"
-  else
-    echo -e "\n\nERROR: FAILED TO TRANSFER THE FILES OF '${db}', WITH 'hadoop distcp'. GOT WITH EXIT STATUS: $?\n\n"
-    rm -f error.log
-    return 2
-  fi
-
-  # In case we ever use this script for a writable DB (using inserts/updates), we should perform the following costly operation as well..
-  #hdfs dfs -conf ${IMPALA_CONFIG_FILE} -chmod -R 777 ${TEMP_SUBDIR_FULLPATH}/${db}.db
-
-  echo -e "\nCreating schema for db: '${db}'\n"
-
-  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
-
-  # Make Impala aware of the creation of the new DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
-  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
-  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
-
-  all_create_view_statements=()
-
-  entities_on_ocean=`hive -e "show tables in ${db};" | sed 's/WARN:.*//g'`  # Get the tables and views without any potential the "WARN" logs.
-  for i in ${entities_on_ocean[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Check if this is a view by showing the create-statement where it should print "create view" for a view, not the "create table". Unfortunately, there is no "show views" command.
-    create_entity_statement=`hive -e "show create table ${db}.${i};"` # It needs to happen in two stages, otherwise the "grep" is not able to match multi-line statement.
-
-    create_view_statement_test=`echo -e "$create_entity_statement" | grep 'CREATE VIEW'`
-    if [ -n "$create_view_statement_test" ]; then
-      echo -e "\n'${i}' is a view, so we will save its 'create view' statement and execute it on Impala, after all tables have been created.\n"
-      create_view_statement=`echo -e "$create_entity_statement" | sed 's/WARN:.*//g' | sed 's/\`//g' \
-        | sed 's/"$/;/' | sed 's/^"//' | sed 's/\\"\\"/\"/g' | sed -e "${LOCATION_HDFS_NODE_SED_ARG}" | sed "${DATE_SED_ARG_1}" | sed "${HASH_SED_ARG_1}" | sed "${LOCATION_SED_ARG_1}" \
-        | sed "${DATE_SED_ARG_2}" | sed "${HASH_SED_ARG_2}" | sed "${LOCATION_SED_ARG_2}" \
-        | sed "${DATE_SED_ARG_3}" | sed "${HASH_SED_ARG_3}" | sed "${LOCATION_SED_ARG_3}"`
-      all_create_view_statements+=("$create_view_statement")
-    else
-      echo -e "\n'${i}' is a table, so we will check for its parquet files and create the table on Impala cluster.\n"
-      CURRENT_PRQ_FILE=`hdfs dfs -conf ${IMPALA_CONFIG_FILE} -ls -C "${IMPALA_HDFS_DB_BASE_PATH}/${db}.db/${i}/" | grep -v 'Found' | grep -v '_impala_insert_staging' |  head -1`
-      if [ -z "$CURRENT_PRQ_FILE" ]; then # If there is not parquet-file inside.
-          echo -e "\nERROR: THE TABLE \"${i}\" HAD NO FILES TO GET THE SCHEMA FROM! IT'S EMPTY!\n\n"
-      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
-        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-        if [ -n "$log_errors" ]; then
-          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
-        fi
-      fi
-    fi
-  done
-
-  echo -e "\nAll tables have been created, going to create the views..\n"
-
-  # Time to loop through the views and create them.
-  # At this point all table-schemas should have been created.
-
-  previous_num_of_views_to_retry=${#all_create_view_statements}
-  if [[ $previous_num_of_views_to_retry -gt 0 ]]; then
-    echo -e "\nAll_create_view_statements:\n\n${all_create_view_statements[@]}\n"  # DEBUG
-    # Make Impala aware of the new tables, so it knows them when creating the views.
-    sleep 1
-    impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-    sleep 1
-  else
-    echo -e "\nDB '${db}' does not contain any views.\n"
-  fi
-
-  level_counter=0
-  while [[ ${#all_create_view_statements[@]} -gt 0 ]]; do
-    ((level_counter++))
-    # The only accepted reason for a view to not be created, is if it depends on another view, which has not been created yet.
-    # In this case, we should retry creating this particular view again.
-    should_retry_create_view_statements=()
-
-    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
-      if [ -n "$specific_errors" ]; then
-        echo -e "\nspecific_errors: ${specific_errors}\n"
-        echo -e "\nView '$(cat error.log | grep "CREATE VIEW " | sed 's/CREATE VIEW //g' | sed 's/ as select .*//g')' failed to be created, possibly because it depends on another view.\n"
-        should_retry_create_view_statements+=("$create_view_statement")
-      else
-          sleep 1 # Wait a bit for Impala to register that the view was created, before possibly referencing it by another view.
-      fi
-    done
-
-    new_num_of_views_to_retry=${#should_retry_create_view_statements}
-    if [[ $new_num_of_views_to_retry -eq $previous_num_of_views_to_retry ]]; then
-      echo -e "\n\nERROR: THE NUMBER OF VIEWS TO RETRY HAS NOT BEEN REDUCED! THE SCRIPT IS LIKELY GOING TO AN INFINITE-LOOP! EXITING..\n\n"
-      return 3
-    elif [[ $new_num_of_views_to_retry -gt 0 ]]; then
-      echo -e "\nTo be retried \"create_view_statements\":\n\n${should_retry_create_view_statements[@]}\n"
-      previous_num_of_views_to_retry=$new_num_of_views_to_retry
-    else
-      echo -e "\nFinished creating views, for db: '${db}', in level-${level_counter}.\n"
-    fi
-    all_create_view_statements=("${should_retry_create_view_statement[@]}") # This is needed in any case to either move forward with the rest of the views or stop at 0 remaining views.
-  done
-
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
-
-  echo -e "\nComputing stats for tables..\n"
-  entities_on_impala=`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`
-  for i in ${entities_on_impala[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Taking the create table statement from the Ocean cluster, just to check if its a view, as the output is easier than using impala-shell from Impala cluster.
-    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
-    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}";
-    fi
-  done
-
-  if [ "${entities_on_impala[@]}" == "${entities_on_ocean[@]}" ]; then
-    echo -e "\nAll entities have been copied to Impala cluster.\n"
-  else
-    echo -e "\n\nERROR: 1 OR MORE ENTITIES OF DB '${db}' FAILED TO BE COPIED TO IMPALA CLUSTER!\n\n"
-    rm -f error.log
-    return 4
-  fi
-
-  rm -f error.log
-  echo -e "\n\nFinished processing db: ${db}\n\n"
-}
-
-
-MONITOR_DB=$1
-
-copydb $MONITOR_DB'_institutions'
-copydb $MONITOR_DB
-
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh
@ -1,57 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-#
-#function createShadowDB() {
-#  SOURCE=$1
-#  SHADOW=$2
-#
-#  # drop views from db
-#  for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} --delimited  -q "show tables"`;
-#    do
-#        `impala-shell  -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "drop view $i;"`;
-#    done
-#
-#  impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database ${SHADOW} CASCADE";
-#  impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${SHADOW}";
-##  impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "show tables" | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f -
-#  impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f -
-#}
-#
-#MONITOR_DB=$1
-#MONITOR_DB_SHADOW=$2
-#
-#createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow'
-#createShadowDB $MONITOR_DB $MONITOR_DB'_shadow'
-
-SOURCE=$1
-PRODUCTION=$2
-echo ${SOURCE}
-echo ${PRODUCTION}
-
-#echo "Updating ${PRODUCTION} monitor database old cluster"
-#impala-shell -q "create database if not exists ${PRODUCTION}"
-#impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f -
-#impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
-#
-#echo "Updating ${PRODUCTION}_institutions database old cluster"
-#impala-shell -q "create database if not exists ${PRODUCTION}_institutions"
-#impala-shell -d ${PRODUCTION}_institutions -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}_institutions./" | sed "s/$/;/" | impala-shell -c -f -
-#impala-shell -d ${SOURCE}_institutions -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}_institutions.\1 as select * from ${SOURCE}_institutions.\1;/" | impala-shell -c -f -
-#echo "Production insitutions db ready!"
-
-echo "Updating ${PRODUCTION} monitor database"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}"
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-echo "Production monitor db ready!"
-
-echo "Updating ${PRODUCTION}_institutions database"
-impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}_institutions"
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}_institutions -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}_institutions./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}_institutions -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}_institutions.\1 as select * from ${SOURCE}_institutions.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -c -f -
-echo "Production insitutions db ready!"
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh
@ -1,60 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
-    rm -Rf "$link_folder"
-    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-export SOURCE=$1
-export TARGET=$2
-export SHADOW=$3
-export SCRIPT_PATH=$4
-export SCRIPT_PATH2=$5
-export SCRIPT_PATH2=$6
-
-export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
-export HADOOP_USER_NAME="oozie"
-
-echo "Getting file from " $4
-hdfs dfs -copyToLocal $4
-
-echo "Getting file from " $5
-hdfs dfs -copyToLocal $5
-
-echo "Getting file from " $6
-hdfs dfs -copyToLocal $6
-
-#update Monitor DB
-cat updateMonitorDBAll.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" > foo
-hive $HIVE_OPTS -f foo
-
-#update Institutions DB
-cat updateMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo
-hive $HIVE_OPTS -f foo
-cat updateMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo
-hive $HIVE_OPTS -f foo
-
-
-
-echo "Hive shell finished"
-
-#echo "Updating shadow monitor insitutions database"
-#hive -e "drop database if exists ${SHADOW}_institutions cascade"
-#hive -e "create database if not exists ${SHADOW}_institutions"
-#hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo
-#hive -f foo
-#echo "Shadow db monitor insitutions ready!"
-#
-##update Monitor DB
-#cat updateMonitorDBAll.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" > foo
-#hive $HIVE_OPTS -f foo
-#
-#echo "Hive shell finished"
-#
-#echo "Updating shadow monitor database"
-#hive -e "drop database if exists ${SHADOW} cascade"
-#hive -e "create database if not exists ${SHADOW}"
-#hive $HIVE_OPTS --database ${2} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${2}.\1;/" > foo
-#hive -f foo
-#echo "Shadow db monitor insitutions ready!"
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql
@ -1,278 +0,0 @@
--drop database if exists TARGET cascade;
--create database if not exists TARGET;
--
--create view if not exists TARGET.category as select * from SOURCE.category;
--create view if not exists TARGET.concept as select * from SOURCE.concept;
--create view if not exists TARGET.context as select * from SOURCE.context;
--create view if not exists TARGET.country as select * from SOURCE.country;
--create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
--create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
--create view if not exists TARGET.funder as select * from SOURCE.funder;
--create view if not exists TARGET.fundref as select * from SOURCE.fundref;
--create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
--create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
--create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
--create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
--create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
--create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--
--create table TARGET.result stored as parquet as
--    select distinct * from (
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
--        union all
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
--        union all
--        select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
--             'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
--             'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
--             'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
--             'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
--             'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
--             'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
--             'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
--             'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
--             'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
--             'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
--             -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
--             'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
--             'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
--             'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
--             'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
--             'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
--             'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
--             'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
--             'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
--             'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
--             'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
--             'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
--             'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
--             'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
--             'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
--             'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
--             'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
--             'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
--             'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
--             'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
--             'openorgs____::e15adb13c4dadd49de4d35c39b5da93a',  -- Nanyang Technological University
--             'openorgs____::4b34103bde246228fcd837f5f1bf4212',  -- Autonomous University of Barcelona
--             'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb',	-- McMaster University
--             'openorgs____::51c7fc556e46381734a25a6fbc3fd398',	-- University of Modena and Reggio Emilia
--             'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db',	-- Bilkent University
--             'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06',	-- Saints Cyril and Methodius University of Skopje
--             'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
--             'openorgs____::b8b8ca674452579f3f593d9f5e557483',   -- University College Cork
--             'openorgs____::38d7097854736583dde879d12dacafca'	-- Brown University
--             'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
--             'openorgs____::2530baca8a15936ba2e3297f2bce2e7e',	-- University of Cape Town
--             'openorgs____::d11f981828c485cd23d93f7f24f24db1',  -- Technological University Dublin
--             'openorgs____::5e6bf8962665cdd040341171e5c631d8',  -- Delft University of Technology
--             'openorgs____::846cb428d3f52a445f7275561a7beb5d',  -- University of Manitoba
--             'openorgs____::eb391317ed0dc684aa81ac16265de041',	-- Universitat Rovira i Virgili
--             'openorgs____::66aa9fc2fceb271423dfabcc38752dc0',  -- Lund University
--             'openorgs____::3cff625a4370d51e08624cc586138b2f'	-- IMT Atlantique
--        ) )) foo;
--
--ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
-
-create view if not exists TARGET.category as select * from SOURCE.category;
-create view if not exists TARGET.concept as select * from SOURCE.concept;
-create view if not exists TARGET.context as select * from SOURCE.context;
-create view if not exists TARGET.country as select * from SOURCE.country;
-create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
-create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
-create view if not exists TARGET.funder as select * from SOURCE.funder;
-create view if not exists TARGET.fundref as select * from SOURCE.fundref;
-create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
-create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
-create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
-create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
-create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
-create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--create view if not exists TARGET.graduatedoctorates as select * from SOURCE.graduatedoctorates;
-
-create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS;
-
-create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS;
-
-create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_citations_oc COMPUTE STATISTICS;
-
-create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS;
-
-create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS;
-
-create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS;
-
-create table TARGET.result_datasources stored as parquet as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS;
-
-create table TARGET.result_fundercount stored as parquet as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS;
-
-create table TARGET.result_gold stored as parquet as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS;
-
-create table TARGET.result_greenoa stored as parquet as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS;
-
-create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS;
-
-create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS;
-
-create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized;
--ANALYZE TABLE TARGET.licenses_normalized COMPUTE STATISTICS;
-
-create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS;
-
-create table TARGET.result_organization stored as parquet as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS;
-
-create table TARGET.result_peerreviewed stored as parquet as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS;
-
-create table TARGET.result_pids stored as parquet as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS;
-
-create table TARGET.result_projectcount stored as parquet as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS;
-
-create table TARGET.result_projects stored as parquet as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS;
-
-create table TARGET.result_refereed stored as parquet as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS;
-
-create table TARGET.result_sources stored as parquet as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS;
-
-create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS;
-
-create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS;
-
-create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS;
-
-create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
-create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
-create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
-drop view TARGET.foo1;
-drop view TARGET.foo2;
--ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS;
-
-- datasources
-create view if not exists TARGET.datasource as select * from SOURCE.datasource;
-create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids;
-create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations;
-create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources;
-
-create table TARGET.datasource_results stored as parquet as select id as result, datasource as id from TARGET.result_datasources;
--ANALYZE TABLE TARGET.datasource_results COMPUTE STATISTICS;
-
-- organizations
-create view if not exists TARGET.organization as select * from SOURCE.organization;
-create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources;
-create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids;
-create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects;
-create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources;
-
-- projects
-create view if not exists TARGET.project as select * from SOURCE.project;
-create view if not exists TARGET.project_oids as select * from SOURCE.project_oids;
-create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations;
-create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount;
-create view if not exists TARGET.project_classification as select * from SOURCE.project_classification;
-create view if not exists TARGET.project_organization_contribution as select * from SOURCE.project_organization_contribution;
-
-create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects;
--ANALYZE TABLE TARGET.project_results COMPUTE STATISTICS;
-
-- indicators
-- Sprint 1 ----
-create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS;
-create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS;
-create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS;
-- Sprint 2 ----
-create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS;
-create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS;
-create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS;
-create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS;
---- Sprint 3 ----
-create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS;
-create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab;
-create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab;
-create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org;
-create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country;
-create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab;
-create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab;
---- Sprint 4 ----
-create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS;
-create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS;
-create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS;
---- Sprint 5 ----
-create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS;
---- Sprint 6 ----
-create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS;
-create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS;
---- Sprint 7 ----
-create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS;
-create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS;
-create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness;
-create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr;
-create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year;
-create view TARGET.indi_org_fairness_pub as select * from SOURCE.indi_org_fairness_pub;
-create view TARGET.indi_org_fairness_year as select * from SOURCE.indi_org_fairness_year;
-create view TARGET.indi_org_findable_year as select * from SOURCE.indi_org_findable_year;
-create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable;
-create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess;
-create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year;
-create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS;
-create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS;
-create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS;
-create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS;
-create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS;
-create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_apc_affiliations COMPUTE STATISTICS;
-create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
-create table TARGET.indi_is_funder_plan_s stored as parquet as select * from SOURCE.indi_is_funder_plan_s orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql
@ -1,297 +0,0 @@
-drop database if exists TARGET cascade;
-create database if not exists TARGET;
-
-create view if not exists TARGET.category as select * from SOURCE.category;
-create view if not exists TARGET.concept as select * from SOURCE.concept;
-create view if not exists TARGET.context as select * from SOURCE.context;
-create view if not exists TARGET.country as select * from SOURCE.country;
-create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
-create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
-create view if not exists TARGET.funder as select * from SOURCE.funder;
-create view if not exists TARGET.fundref as select * from SOURCE.fundref;
-create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
-create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
-create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
-create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
-create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
-create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--create view if not exists TARGET.graduatedoctorates as select * from SOURCE.graduatedoctorates;
-
-create table TARGET.result stored as parquet as
-    select distinct * from (
-        select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
-        union all
-        select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
-        union all
-        select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
-             'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
-             'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
-             'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
-             'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
-             'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
-             'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
-             'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
-             'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
-             'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
-             'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
-             -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
-             'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
-             'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
-             'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
-             'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
-             'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
-             'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
-             'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
-             'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
-             'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
-             'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
-             'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
-             'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
-             'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
-             'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
-             'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
-             'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
-             'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
-             'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
-             'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
-             'openorgs____::e15adb13c4dadd49de4d35c39b5da93a',  -- Nanyang Technological University
-             'openorgs____::4b34103bde246228fcd837f5f1bf4212',  -- Autonomous University of Barcelona
-             'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb',	-- McMaster University
-             'openorgs____::51c7fc556e46381734a25a6fbc3fd398',	-- University of Modena and Reggio Emilia
-             'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db',	-- Bilkent University
-             'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06',	-- Saints Cyril and Methodius University of Skopje
-             'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
-             'openorgs____::b8b8ca674452579f3f593d9f5e557483',   -- University College Cork
-             'openorgs____::38d7097854736583dde879d12dacafca',	-- Brown University
-             'openorgs____::57784c9e047e826fefdb1ef816120d92',  --Arts et Métiers ParisTech
-             'openorgs____::2530baca8a15936ba2e3297f2bce2e7e',	-- University of Cape Town
-             'openorgs____::d11f981828c485cd23d93f7f24f24db1',  -- Technological University Dublin
-             'openorgs____::5e6bf8962665cdd040341171e5c631d8',  -- Delft University of Technology
-             'openorgs____::846cb428d3f52a445f7275561a7beb5d',  -- University of Manitoba
-             'openorgs____::eb391317ed0dc684aa81ac16265de041',	-- Universitat Rovira i Virgili
-             'openorgs____::66aa9fc2fceb271423dfabcc38752dc0',  -- Lund University
-             'openorgs____::3cff625a4370d51e08624cc586138b2f',	-- IMT Atlantique
-             'openorgs____::c0b262bd6eab819e4c994914f9c010e2',  -- National Institute of Geophysics and Volcanology
-             'openorgs____::1624ff7c01bb641b91f4518539a0c28a',   -- Vrije Universiteit Amsterdam
-             'openorgs____::4d4051b56708688235252f1d8fddb8c1',	-- Iscte - Instituto Universitário de Lisboa
-             'openorgs____::ab4ac74c35fa5dada770cf08e5110fab',	-- Universidade Católica Portuguesa
-             'openorgs____::4d4051b56708688235252f1d8fddb8c1',	-- Iscte - Instituto Universitário de Lisboa
-             'openorgs____::5d55fb216b14691cf68218daf5d78cd9',  -- Munster Technological University
-             'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9',  -- Cardiff University
-             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',  -- Leibniz Institute of Ecological Urban and Regional Development
-             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
-             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2'	-- TU Dresden
-        ))) foo;
-
--ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
-
-create view if not exists TARGET.category as select * from SOURCE.category;
-create view if not exists TARGET.concept as select * from SOURCE.concept;
-create view if not exists TARGET.context as select * from SOURCE.context;
-create view if not exists TARGET.country as select * from SOURCE.country;
-create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
-create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
-create view if not exists TARGET.funder as select * from SOURCE.funder;
-create view if not exists TARGET.fundref as select * from SOURCE.fundref;
-create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
-create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
-create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
-create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
-create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
-create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--create view if not exists TARGET.graduatedoctorates as select * from SOURCE.graduatedoctorates;
-
-create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS;
-
-create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS;
-
-create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_citations_oc COMPUTE STATISTICS;
-
-create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS;
-
-create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS;
-
-create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS;
-
-create table TARGET.result_datasources stored as parquet as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS;
-
-create table TARGET.result_fundercount stored as parquet as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS;
-
-create table TARGET.result_gold stored as parquet as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS;
-
-create table TARGET.result_greenoa stored as parquet as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS;
-
-create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS;
-
-create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS;
-
-create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized;
--ANALYZE TABLE TARGET.licenses_normalized COMPUTE STATISTICS;
-
-create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS;
-
-create table TARGET.result_organization stored as parquet as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS;
-
-create table TARGET.result_peerreviewed stored as parquet as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS;
-
-create table TARGET.result_pids stored as parquet as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS;
-
-create table TARGET.result_projectcount stored as parquet as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS;
-
-create table TARGET.result_projects stored as parquet as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS;
-
-create table TARGET.result_refereed stored as parquet as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS;
-
-create table TARGET.result_sources stored as parquet as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS;
-
-create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS;
-
-create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS;
-
-create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS;
-
-create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-
-create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
-create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
-create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
-drop view TARGET.foo1;
-drop view TARGET.foo2;
--ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS;
-
-- datasources
-create view if not exists TARGET.datasource as select * from SOURCE.datasource;
-create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids;
-create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations;
-create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources;
-
-create table TARGET.datasource_results stored as parquet as select id as result, datasource as id from TARGET.result_datasources;
--ANALYZE TABLE TARGET.datasource_results COMPUTE STATISTICS;
-
-- organizations
-create view if not exists TARGET.organization as select * from SOURCE.organization;
-create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources;
-create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids;
-create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects;
-create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources;
-
-- projects
-create view if not exists TARGET.project as select * from SOURCE.project;
-create view if not exists TARGET.project_oids as select * from SOURCE.project_oids;
-create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations;
-create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount;
-create view if not exists TARGET.project_classification as select * from SOURCE.project_classification;
-create view if not exists TARGET.project_organization_contribution as select * from SOURCE.project_organization_contribution;
-
-create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects;
--ANALYZE TABLE TARGET.project_results COMPUTE STATISTICS;
-
-- indicators
-- Sprint 1 ----
-create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS;
-create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS;
-create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS;
-- Sprint 2 ----
-create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS;
-create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS;
-create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS;
-create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS;
---- Sprint 3 ----
-create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS;
-create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab;
-create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab;
-create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org;
-create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country;
-create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab;
-create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab;
---- Sprint 4 ----
-create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS;
-create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS;
-create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS;
---- Sprint 5 ----
-create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS;
---- Sprint 6 ----
-create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS;
-create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS;
-create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
--ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS;
---- Sprint 7 ----
-create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS;
-create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS;
-create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness;
-create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr;
-create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year;
-create view TARGET.indi_org_fairness_pub as select * from SOURCE.indi_org_fairness_pub;
-create view TARGET.indi_org_fairness_year as select * from SOURCE.indi_org_fairness_year;
-create view TARGET.indi_org_findable_year as select * from SOURCE.indi_org_findable_year;
-create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable;
-create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess;
-create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year;
-create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS;
-create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS;
-create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS;
-create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS;
-create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS;
-create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--ANALYZE TABLE TARGET.result_apc_affiliations COMPUTE STATISTICS;
--create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--create table TARGET.indi_is_funder_plan_s stored as parquet as select * from SOURCE.indi_is_funder_plan_s orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--create view TARGET.indi_funder_fairness as select * from SOURCE.indi_funder_fairness;
--create view TARGET.indi_funder_openess as select * from SOURCE.indi_funder_openess;
--create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable;
--create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness;
--create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess;
--create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable;
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql
@ -1,67 +0,0 @@
-drop database if exists TARGET cascade;
-create database if not exists TARGET;
-
-create table TARGET.result stored as parquet as
-    select distinct * from (
-        select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
-             'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
-             'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
-             'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
-             'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
-             'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
-             'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
-             'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
-             'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
-             'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
-             'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
-             -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
-             'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
-             'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
-             'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
-             'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
-             'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
-             'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
-             'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
-             'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
-             'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
-             'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
-             'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
-             'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
-             'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
-             'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
-             'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
-             'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
-             'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
-             'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
-             'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
-             'openorgs____::e15adb13c4dadd49de4d35c39b5da93a',  -- Nanyang Technological University
-             'openorgs____::4b34103bde246228fcd837f5f1bf4212',  -- Autonomous University of Barcelona
-             'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb',	-- McMaster University
-             'openorgs____::51c7fc556e46381734a25a6fbc3fd398',	-- University of Modena and Reggio Emilia
-             'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db',	-- Bilkent University
-             'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06',	-- Saints Cyril and Methodius University of Skopje
-             'openorgs____::db7686f30f22cbe73a4fde872ce812a6',  -- University of Milan
-             'openorgs____::b8b8ca674452579f3f593d9f5e557483',   -- University College Cork
-             'openorgs____::38d7097854736583dde879d12dacafca',	-- Brown University
-             'openorgs____::57784c9e047e826fefdb1ef816120d92',  --Arts et Métiers ParisTech
-             'openorgs____::2530baca8a15936ba2e3297f2bce2e7e',	-- University of Cape Town
-             'openorgs____::d11f981828c485cd23d93f7f24f24db1',  -- Technological University Dublin
-             'openorgs____::5e6bf8962665cdd040341171e5c631d8',  -- Delft University of Technology
-             'openorgs____::846cb428d3f52a445f7275561a7beb5d',  -- University of Manitoba
-             'openorgs____::eb391317ed0dc684aa81ac16265de041',	-- Universitat Rovira i Virgili
-             'openorgs____::66aa9fc2fceb271423dfabcc38752dc0',  -- Lund University
-             'openorgs____::3cff625a4370d51e08624cc586138b2f',	-- IMT Atlantique
-             'openorgs____::c0b262bd6eab819e4c994914f9c010e2',   -- National Institute of Geophysics and Volcanology
-             'openorgs____::1624ff7c01bb641b91f4518539a0c28a',   -- Vrije Universiteit Amsterdam
-             'openorgs____::4d4051b56708688235252f1d8fddb8c1',	 --Iscte - Instituto Universitário de Lisboa
-             'openorgs____::ab4ac74c35fa5dada770cf08e5110fab',	-- Universidade Católica Portuguesa
-             'openorgs____::4d4051b56708688235252f1d8fddb8c1',	-- Iscte - Instituto Universitário de Lisboa
-             'openorgs____::5d55fb216b14691cf68218daf5d78cd9',  -- Munster Technological University
-             'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9',  -- Cardiff University
-             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',  -- Leibniz Institute of Ecological Urban and Regional Development
-             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
-             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2'	-- TU Dresden
-        )))  foo;
-
--ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml
@ -1,111 +0,0 @@
-<workflow-app name="Stats Monitor Update" xmlns="uri:oozie:workflow:0.5">
-    <parameters>
-        <property>
-            <name>stats_db_name</name>
-            <description>the target stats database name</description>
-        </property>
-        <property>
-            <name>monitor_db_name</name>
-            <description>the target monitor db name</description>
-        </property>
-        <property>
-            <name>monitor_db_shadow_name</name>
-            <description>the name of the shadow monitor db</description>
-        </property>
-        <property>
-            <name>hive_metastore_uris</name>
-            <description>hive server metastore URIs</description>
-        </property>
-        <property>
-            <name>hive_jdbc_url</name>
-            <description>hive server jdbc url</description>
-        </property>
-        <property>
-            <name>hive_timeout</name>
-            <description>the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
-        </property>
-        <property>
-            <name>hadoop_user_name</name>
-            <description>user name of the wf owner</description>
-        </property>
-    </parameters>
-
-    <global>
-        <job-tracker>${jobTracker}</job-tracker>
-        <name-node>${nameNode}</name-node>
-        <configuration>
-            <property>
-                <name>hive.metastore.uris</name>
-                <value>${hive_metastore_uris}</value>
-            </property>
-            <property>
-            	<name>hive.txn.timeout</name>
-            	<value>${hive_timeout}</value>
-            </property>
-	<property>
-	    <name>mapred.job.queue.name</name>
-	    <value>analytics</value>
-	</property>
-        </configuration>
-    </global>
-
-    <start to="resume_from"/>
-    <decision name="resume_from">
-        <switch>
-            <case to="Step1-updateMonitorDB">${wf:conf('resumeFrom') eq 'Step1-updateMonitorDB'}</case>
-            <case to="Step2-copyDataToImpalaCluster">${wf:conf('resumeFrom') eq 'Step2-copyDataToImpalaCluster'}</case>
-            <case to="Step3-finalizeImpalaCluster">${wf:conf('resumeFrom') eq 'Step3-finalizeImpalaCluster'}</case>
-            <default to="Step1-updateMonitorDB"/>
-        </switch>
-    </decision>
-
-    <kill name="Kill">
-        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-
-    <action name="Step1-updateMonitorDB">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>monitor.sh</exec>
-            <argument>${stats_db_name}</argument>
-            <argument>${monitor_db_name}</argument>
-            <argument>${monitor_db_shadow_name}</argument>
-            <argument>${wf:appPath()}/scripts/updateMonitorDB_institutions.sql</argument>
-            <argument>${wf:appPath()}/scripts/updateMonitorDB.sql</argument>
-            <argument>${wf:appPath()}/scripts/updateMonitorDBAll.sql</argument>
-            <file>monitor.sh</file>
-        </shell>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="Step2-copyDataToImpalaCluster">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>copyDataToImpalaCluster.sh</exec>
-            <argument>${monitor_db_name}</argument>
-            <argument>${hadoop_user_name}</argument>
-            <file>copyDataToImpalaCluster.sh</file>
-        </shell>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="Step3-finalizeImpalaCluster">
-        <shell xmlns="uri:oozie:shell-action:0.1">
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <exec>finalizeImpalaCluster.sh</exec>
-            <argument>${monitor_db_name}</argument>
-            <argument>${monitor_db_prod_name}</argument>
-            <argument>${monitor_db_shadow_name}</argument>
-            <file>finalizeImpalaCluster.sh</file>
-        </shell>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-    <end name="End"/>
-</workflow-app>
--- a/dhp-workflows/dhp-stats-update/pom.xml
+++ b/dhp-workflows/dhp-stats-update/pom.xml
@ -8,11 +8,6 @@
    <modelVersion>4.0.0</modelVersion>
    <artifactId>dhp-stats-update</artifactId>
    <dependencies>
-        <dependency>
-            <groupId>eu.dnetlib.dhp</groupId>
-            <artifactId>dhp-common</artifactId>
-            <version>${project.version}</version>
-        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.binary.version}</artifactId>
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh
@ -35,20 +35,12 @@ export HADOOP_USER="oozie"
 export HADOOP_USER_NAME="oozie"

 echo "Creating and populating impala tables"
-hive $HIVE_OPTS -e "create table ${TARGET_DB}.context_csv (id string, name string) row format delimited fields terminated by ','"
-hive $HIVE_OPTS -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context_csv"
-hive $HIVE_OPTS -e "create table ${TARGET_DB}.context stored as parquet as select * from ${TARGET_DB}.context_csv"
-hive $HIVE_OPTS -e "drop table ${TARGET_DB}.context_csv purge"
-
-hive $HIVE_OPTS -e "create table ${TARGET_DB}.category_csv (context string, id string, name string) row format delimited fields terminated by ','"
-hive $HIVE_OPTS -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category_csv"
-hive $HIVE_OPTS -e "create table ${TARGET_DB}.category stored as parquet as select * from ${TARGET_DB}.category_csv"
-hive $HIVE_OPTS -e "drop table ${TARGET_DB}.category_csv purge"
-
-hive $HIVE_OPTS -e "create table ${TARGET_DB}.concept_csv (category string, id string, name string) row format delimited fields terminated by ','"
-hive $HIVE_OPTS -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept_csv"
-hive $HIVE_OPTS -e "create table ${TARGET_DB}.concept stored as parquet as select * from ${TARGET_DB}.concept_csv"
-hive $HIVE_OPTS -e "drop table ${TARGET_DB}.concept_csv purge"
+hive $HIVE_OPTS -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
+hive $HIVE_OPTS -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
+hive $HIVE_OPTS -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
+hive $HIVE_OPTS -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
+hive $HIVE_OPTS -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
+hive $HIVE_OPTS -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"

 echo "Cleaning up"
 rm concepts.csv
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
@ -6,215 +6,68 @@ then
    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
 fi

-
-# Set the active HDFS node of OCEAN and IMPALA cluster.
-OCEAN_HDFS_NODE='hdfs://nameservice1'
-echo -e "\nOCEAN HDFS virtual-name which resolves automatically to the active-node: ${OCEAN_HDFS_NODE}"
-
-IMPALA_HDFS_NODE=''
-COUNTER=0
-while [ $COUNTER -lt 3 ]; do
-  if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
-      break
-  elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
-      IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
-      break
-  else
-      IMPALA_HDFS_NODE=''
-      sleep 1
-  fi
-  ((COUNTER++))
-done
-if [ -z "$IMPALA_HDFS_NODE" ]; then
-    echo -e "\n\nERROR: PROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! | AFTER ${COUNTER} RETRIES.\n\n"
-    exit 1
-fi
-echo -e "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries.\n\n"
-
-IMPALA_HOSTNAME='impala-cluster-dn1.openaire.eu'
-IMPALA_CONFIG_FILE='/etc/impala_cluster/hdfs-site.xml'
-
-IMPALA_HDFS_DB_BASE_PATH="${IMPALA_HDFS_NODE}/user/hive/warehouse"
-
-# Set sed arguments.
-LOCATION_HDFS_NODE_SED_ARG="s|${OCEAN_HDFS_NODE}|${IMPALA_HDFS_NODE}|g" # This requires to be used with "sed -e" in order to have the "|" delimiter (as the "/" conflicts with the URIs)
-
-# Set the SED command arguments for column-names with reserved words:
-DATE_SED_ARG_1='s/[[:space:]]\date[[:space:]]/\`date\`/g'
-DATE_SED_ARG_2='s/\.date,/\.\`date\`,/g'  # the "date" may be part of a larger field name like "datestamp" or "date_aggregated", so we need to be careful with what we are replacing.
-DATE_SED_ARG_3='s/\.date[[:space:]]/\.\`date\` /g'
-
-HASH_SED_ARG_1='s/[[:space:]]\hash[[:space:]]/\`hash\`/g'
-HASH_SED_ARG_2='s/\.hash,/\.\`hash\`,/g'
-HASH_SED_ARG_3='s/\.hash[[:space:]]/\.\`hash\` /g'
-
-LOCATION_SED_ARG_1='s/[[:space:]]\location[[:space:]]/\`location\`/g'
-LOCATION_SED_ARG_2='s/\.location,/\.\`location\`,/g'
-LOCATION_SED_ARG_3='s/\.location[[:space:]]/\.\`location\` /g'
-
-
 export HADOOP_USER_NAME=$6
 export PROD_USAGE_STATS_DB="openaire_prod_usage_stats"
-
-
 function copydb() {
  db=$1
-  echo -e "\nStart processing db: '${db}'..\n"
+  FILE=("hive_wf_tmp_"$RANDOM)
+  hdfs dfs -mkdir hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/
+  # copy the databases from ocean to impala

-  # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-  log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-  if [ -n "$log_errors" ]; then
-    echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE! EXITING...\n\n"
-    rm -f error.log
-    return 1
-  fi
+  echo "copying $db"
+  hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/

-  # Make Impala aware of the deletion of the old DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
+  # change ownership to impala
+  hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db

-  echo -e "\n\nCopying files of '${db}', from Ocean to Impala cluster..\n"
-  # Using max-bandwidth of: 50 * 100 Mb/s = 5 Gb/s
-  # Using max memory of: 50 * 6144 = 300 Gb
-  # Using 1MB as a buffer-size.
-  # The " -Ddistcp.dynamic.recordsPerChunk=50" arg is not available in our version of hadoop
-  # The "ug" args cannot be used as we get a "User does not belong to hive" error.
-  # The "p" argument cannot be used, as it blocks the files from being used, giving a "sticky bit"-error, even after applying chmod and chown onm the files.
-  hadoop distcp -Dmapreduce.map.memory.mb=6144 -m 70 -bandwidth 150 \
-                -numListstatusThreads 40 \
-                -copybuffersize 1048576 \
-                -strategy dynamic \
-                -pb \
-                ${OCEAN_HDFS_NODE}/user/hive/warehouse/${db}.db ${IMPALA_HDFS_DB_BASE_PATH}
-
-  # Check the exit status of the "hadoop distcp" command.
-  if [ $? -eq 0 ]; then
-    echo -e "\nSuccessfully copied the files of '${db}'.\n"
-  else
-    echo -e "\n\nERROR: FAILED TO TRANSFER THE FILES OF '${db}', WITH 'hadoop distcp'. GOT WITH EXIT STATUS: $?\n\n"
-    rm -f error.log
-    return 2
-  fi
-
-  # In case we ever use this script for a writable DB (using inserts/updates), we should perform the following costly operation as well..
-  #hdfs dfs -conf ${IMPALA_CONFIG_FILE} -chmod -R 777 ${TEMP_SUBDIR_FULLPATH}/${db}.db
-
-  echo -e "\nCreating schema for db: '${db}'\n"
-
-  # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
-
-  # Make Impala aware of the creation of the new DB immediately.
-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
-  # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
-  # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
-
-  all_create_view_statements=()
-
-  entities_on_ocean=`hive -e "show tables in ${db};" | sed 's/WARN:.*//g'`  # Get the tables and views without any potential the "WARN" logs.
-  for i in ${entities_on_ocean[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Check if this is a view by showing the create-statement where it should print "create view" for a view, not the "create table". Unfortunately, there is no "show views" command.
-    create_entity_statement=`hive -e "show create table ${db}.${i};"` # It needs to happen in two stages, otherwise the "grep" is not able to match multi-line statement.
-
-    create_view_statement_test=`echo -e "$create_entity_statement" | grep 'CREATE VIEW'`
-    if [ -n "$create_view_statement_test" ]; then
-      echo -e "\n'${i}' is a view, so we will save its 'create view' statement and execute it on Impala, after all tables have been created.\n"
-      create_view_statement=`echo -e "$create_entity_statement" | sed 's/WARN:.*//g' | sed 's/\`//g' \
-        | sed 's/"$/;/' | sed 's/^"//' | sed 's/\\"\\"/\"/g' | sed -e "${LOCATION_HDFS_NODE_SED_ARG}" | sed "${DATE_SED_ARG_1}" | sed "${HASH_SED_ARG_1}" | sed "${LOCATION_SED_ARG_1}" \
-        | sed "${DATE_SED_ARG_2}" | sed "${HASH_SED_ARG_2}" | sed "${LOCATION_SED_ARG_2}" \
-        | sed "${DATE_SED_ARG_3}" | sed "${HASH_SED_ARG_3}" | sed "${LOCATION_SED_ARG_3}"`
-      all_create_view_statements+=("$create_view_statement")
-    else
-      echo -e "\n'${i}' is a table, so we will check for its parquet files and create the table on Impala cluster.\n"
-      CURRENT_PRQ_FILE=`hdfs dfs -conf ${IMPALA_CONFIG_FILE} -ls -C "${IMPALA_HDFS_DB_BASE_PATH}/${db}.db/${i}/" | grep -v 'Found' | grep -v '_impala_insert_staging' |  head -1`
-      if [ -z "$CURRENT_PRQ_FILE" ]; then # If there is not parquet-file inside.
-          echo -e "\nERROR: THE TABLE \"${i}\" HAD NO FILES TO GET THE SCHEMA FROM! IT'S EMPTY!\n\n"
-      else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
-        log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
-        if [ -n "$log_errors" ]; then
-          echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
-        fi
-      fi
-    fi
-  done
-
-  echo -e "\nAll tables have been created, going to create the views..\n"
-
-  # Time to loop through the views and create them.
-  # At this point all table-schemas should have been created.
-
-  previous_num_of_views_to_retry=${#all_create_view_statements}
-  if [[ $previous_num_of_views_to_retry -gt 0 ]]; then
-    echo -e "\nAll_create_view_statements:\n\n${all_create_view_statements[@]}\n"  # DEBUG
-    # Make Impala aware of the new tables, so it knows them when creating the views.
-    sleep 1
-    impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-    sleep 1
-  else
-    echo -e "\nDB '${db}' does not contain any views.\n"
-  fi
-
-  level_counter=0
-  while [[ ${#all_create_view_statements[@]} -gt 0 ]]; do
-    ((level_counter++))
-    # The only accepted reason for a view to not be created, is if it depends on another view, which has not been created yet.
-    # In this case, we should retry creating this particular view again.
-    should_retry_create_view_statements=()
-
-    for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
-      specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
-      if [ -n "$specific_errors" ]; then
-        echo -e "\nspecific_errors: ${specific_errors}\n"
-        echo -e "\nView '$(cat error.log | grep "CREATE VIEW " | sed 's/CREATE VIEW //g' | sed 's/ as select .*//g')' failed to be created, possibly because it depends on another view.\n"
-        should_retry_create_view_statements+=("$create_view_statement")
-      else
-          sleep 1 # Wait a bit for Impala to register that the view was created, before possibly referencing it by another view.
-      fi
+  # drop tables from db
+  for i in `impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} --delimited  -q "show tables"`;
+    do
+        `impala-shell  -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop table $i;"`;
    done

-    new_num_of_views_to_retry=${#should_retry_create_view_statements}
-    if [[ $new_num_of_views_to_retry -eq $previous_num_of_views_to_retry ]]; then
-      echo -e "\n\nERROR: THE NUMBER OF VIEWS TO RETRY HAS NOT BEEN REDUCED! THE SCRIPT IS LIKELY GOING TO AN INFINITE-LOOP! EXITING..\n\n"
-      return 3
-    elif [[ $new_num_of_views_to_retry -gt 0 ]]; then
-      echo -e "\nTo be retried \"create_view_statements\":\n\n${should_retry_create_view_statements[@]}\n"
-      previous_num_of_views_to_retry=$new_num_of_views_to_retry
-    else
-      echo -e "\nFinished creating views, for db: '${db}', in level-${level_counter}.\n"
-    fi
-    all_create_view_statements=("${should_retry_create_view_statement[@]}") # This is needed in any case to either move forward with the rest of the views or stop at 0 remaining views.
+  # drop views from db
+  for i in `impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} --delimited  -q "show tables"`;
+    do
+        `impala-shell  -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop view $i;"`;
+    done
+
+  # delete the database
+  impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -q "drop database if exists ${db} cascade";
+
+  # create the databases
+  impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -q "create database ${db}";
+
+  impala-shell --user $HADOOP_USER_NAME -q "INVALIDATE METADATA"
+  echo "creating schema for ${db}"
+  for ((  k  = 0;  k  < 5;  k ++ )); do
+  for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited  -q "show tables"`;
+    do
+      impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited  -q "show create table $i";
+    done |  sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -
  done

-  sleep 1
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA"
-  sleep 1
+#  for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited  -q "show tables"`;
+#    do
+#      impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited  -q "show create table $i";
+#    done |  sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -
+#
+#  # run the same command twice because we may have failures in the first run (due to views pointing to the same db)
+#  for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited  -q "show tables"`;
+#    do
+#      impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited  -q "show create table $i";
+#    done |  sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -

-  echo -e "\nComputing stats for tables..\n"
-  entities_on_impala=`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`
-  for i in ${entities_on_impala[@]}; do # Use un-quoted values, as the elemetns are single-words.
-    # Taking the create table statement from the Ocean cluster, just to check if its a view, as the output is easier than using impala-shell from Impala cluster.
-    create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
-    if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}";
-    fi
-  done
+  # load the data from /tmp in the respective tables
+  echo "copying data in tables and computing stats"
+  for i in `impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} --delimited  -q "show tables"`;
+      do
+        impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "load data inpath '/tmp/$FILE/${db}.db/$i' into table $i";
+        impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "compute stats $i";
+      done

-  if [ "${entities_on_impala[@]}" == "${entities_on_ocean[@]}" ]; then
-    echo -e "\nAll entities have been copied to Impala cluster.\n"
-  else
-    echo -e "\n\nERROR: 1 OR MORE ENTITIES OF DB '${db}' FAILED TO BE COPIED TO IMPALA CLUSTER!\n\n"
-    rm -f error.log
-    return 4
-  fi
-
-  rm -f error.log
-  echo -e "\n\nFinished processing db: ${db}\n\n"
+  # deleting the remaining directory from hdfs
+hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -rm -R /tmp/$FILE/${db}.db
 }

 STATS_DB=$1
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
@ -85,12 +85,12 @@ hive $HIVE_OPTS --database ${2}_funded -e "show tables" | grep -v WARN | sed "s/
 hive -f foo
 echo "Updated shadow monitor funded database"

-echo "Updating shadow monitor institutions database"
+echo "Updating shadow monitor insitutions database"
 hive -e "drop database if exists ${SHADOW}_institutions cascade"
 hive -e "create database if not exists ${SHADOW}_institutions"
 hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo
 hive -f foo
-echo "Shadow db monitor institutions ready!"
+echo "Shadow db monitor insitutions ready!"

 echo "Updating shadow monitor RIs database"
 for i in $contexts
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
@ -69,7 +69,7 @@ SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
 DROP TABLE IF EXISTS ${stats_db_name}.result_orcid purge;

 CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
-select distinct res.id, upper(regexp_replace(res.orcid, 'http://orcid.org/' ,'')) as orcid
+select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
 from (
    SELECT substr(res.id, 4) as id, auth_pid.value as orcid
    FROM ${openaire_db_name}.result res
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql
@ -7,76 +7,32 @@
 ------------------------------------------------------

 DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge;
+
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
-with peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
-non_peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
-from (
-    select peer_reviewed.* from peer_reviewed
-    union all
-    select non_peer_reviewed.* from non_peer_reviewed
-    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+select substr(r.id, 4) as id, inst.refereed.classname as refereed
+from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
+where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;

 DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge;
+
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
-with peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
-non_peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
-from (
-    select peer_reviewed.* from peer_reviewed
-    union all
-    select non_peer_reviewed.* from non_peer_reviewed
-    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+select substr(r.id, 4) as id, inst.refereed.classname as refereed
+from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
+where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;

 DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge;
+
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
-with peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
-non_peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
-from (
-    select peer_reviewed.* from peer_reviewed
-    union all
-    select non_peer_reviewed.* from non_peer_reviewed
-    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+select substr(r.id, 4) as id, inst.refereed.classname as refereed
+from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
+where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;

 DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge;
+
 CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
-with peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
-non_peer_reviewed as (
-    select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
-    from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
-    where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
-select distinct *
-from (
-    select peer_reviewed.* from peer_reviewed
-    union all
-    select non_peer_reviewed.* from non_peer_reviewed
-    left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
-    where peer_reviewed.id is null) pr;
+select substr(r.id, 4) as id, inst.refereed.classname as refereed
+from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
+where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;

 CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
 select * from ${stats_db_name}.publication_refereed
@ -104,4 +60,4 @@ rel.properties[1].value apc_currency
 from ${openaire_db_name}.relation rel
 join ${openaire_db_name}.organization o on o.id=rel.source
 join ${openaire_db_name}.result r on r.id=rel.target
-where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;
+where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
@ -50,13 +50,13 @@ select distinct r.*
 from (
         select substr(r.id, 4) as id, inst.accessright.classname as accessright, inst.accessright.openaccessroute as accessright_uw, substr(inst.collectedfrom.key, 4) as collectedfrom,
                substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid
-         from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view outer explode(inst.pid) pids as p) r
+         from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
 join ${stats_db_name}.result res on res.id=r.id;

 DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge;

 create table if not exists ${stats_db_name}.result_apc STORED AS PARQUET as
-select distinct r.id, r.amount, r.currency
+select r.id, r.amount, r.currency
 from (
         select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
         from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@ -180,12 +180,4 @@ create view TARGET.indi_funder_openess as select * from SOURCE.indi_funder_opene
 create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable;
 create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness;
 create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess;
-create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable;
-
-create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_oa_with_license stored as parquet as select * from SOURCE.indi_result_oa_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.indi_result_oa_without_license stored as parquet as select * from SOURCE.indi_result_oa_without_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_under_transformative stored as parquet as select * from SOURCE.indi_result_under_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql
@ -80,12 +80,8 @@ create table TARGET.result stored as parquet as
             'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9',  -- Cardiff University
             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',   -- Leibniz Institute of Ecological Urban and Regional Development
             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
-             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',  -- TU Dresden
-             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  --	University of Vienna
-             'openorgs____::7501d66d2297a963ebfb075c43fff88e',  -- Royal Institute of Technology
-             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',  -- Sorbonne University
-             'openorgs____::b316f25380d106aac402f5ae8653910d'  --	Centre for Research on Ecology and Forestry Applications
+             'openorgs____::b5ca9d4340e26454e367e2908ef3872f'	-- Alma Mater Studiorum University of Bologna
+
        ) )) foo;

 create view if not exists TARGET.category as select * from SOURCE.category;
@ -268,11 +264,4 @@ create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness;
 create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess;
 create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable;

-create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_oa_with_license stored as parquet as select * from SOURCE.indi_result_oa_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-create table TARGET.indi_result_oa_without_license stored as parquet as select * from SOURCE.indi_result_oa_without_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-
-create table TARGET.indi_result_under_transformative stored as parquet as select * from SOURCE.indi_result_under_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id);

--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql
@ -60,10 +60,5 @@ create table TARGET.result stored as parquet as
             'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9',  -- Cardiff University
             'openorgs____::8839b55dae0c84d56fd533f52d5d483a',   -- Leibniz Institute of Ecological Urban and Regional Development
             'openorgs____::526468206bca24c1c90da6a312295cf4',	-- Cyprus University of Technology
-             'openorgs____::b5ca9d4340e26454e367e2908ef3872f',	-- Alma Mater Studiorum University of Bologna
-             'openorgs____::a6340e6ecf60f6bba163659df985b0f2',	-- TU Dresden
-             'openorgs____::64badd35233ba2cd4946368ef2f4cf57',  -- University of Vienna
-             'openorgs____::7501d66d2297a963ebfb075c43fff88e',  -- Royal Institute of Technology
-             'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf',	-- Sorbonne University
-             'openorgs____::b316f25380d106aac402f5ae8653910d'   -- Centre for Research on Ecology and Forestry Applications
+             'openorgs____::b5ca9d4340e26454e367e2908ef3872f'	-- Alma Mater Studiorum University of Bologna
        )))  foo;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql
@ -2,8 +2,9 @@ create table ${observatory_db_name}.result_cc_licence stored as parquet as
 select r.id, coalesce(rln.count, 0) > 0 as cc_licence
 from ${stats_db_name}.result r
         left outer join (
-    select rl.id, sum(case when rl.type like 'CC%' then 1 else 0 end) as count
+    select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
    from ${stats_db_name}.result_licenses rl
+        left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license
    group by rl.id
 ) rln on rln.id=r.id;

--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
@ -95,8 +95,7 @@ DROP TABLE IF EXISTS ${stats_db_name}.funder purge;
 create table ${stats_db_name}.funder STORED AS PARQUET as
 select distinct xpath_string(fund, '//funder/id')        as id,
                xpath_string(fund, '//funder/name')      as name,
-                xpath_string(fund, '//funder/shortname') as shortname,
-                xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
+                xpath_string(fund, '//funder/shortname') as shortname
 from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;

 DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@ -64,26 +64,6 @@
            <name>hadoop_user_name</name>
            <description>user name of the wf owner</description>
        </property>
-
-        <property>
-            <name>sparkSqlWarehouseDir</name>
-        </property>
-        <!-- General oozie workflow properties -->
-        <property>
-            <name>sparkClusterOpts</name>
-            <value>--conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory</value>
-            <description>spark cluster-wide options</description>
-        </property>
-        <property>
-            <name>sparkResourceOpts</name>
-            <value>--executor-memory=6G --conf spark.executor.memoryOverhead=4G --executor-cores=6 --driver-memory=8G --driver-cores=4</value>
-            <description>spark resource options</description>
-        </property>
-        <property>
-            <name>sparkApplicationOpts</name>
-            <value>--conf spark.sql.shuffle.partitions=3840</value>
-            <description>spark resource options</description>
-        </property>
    </parameters>

    <global>
@ -95,21 +75,17 @@
                <value>${hive_metastore_uris}</value>
            </property>
            <property>
-                <name>hive.txn.timeout</name>
-                <value>${hive_timeout}</value>
+            	<name>hive.txn.timeout</name>
+            	<value>${hive_timeout}</value>
            </property>
            <property>
                <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
                <value>0.80</value>
            </property>
-            <property>
-                <name>oozie.action.sharelib.for.spark</name>
-                <value>${oozieActionShareLibForSpark2}</value>
-            </property>
-            <property>
-                <name>mapred.job.queue.name</name>
-                <value>analytics</value>
-            </property>
+	<property>
+	    <name>mapred.job.queue.name</name>
+	    <value>analytics</value>
+	</property>
        </configuration>
    </global>

@ -157,164 +133,164 @@
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step1.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step2"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step2">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step2.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step3"/>
        <error to="Kill"/>
    </action>
-
+        
    <action name="Step3">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step3.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step4"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step4">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step4.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step5"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step5">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step5.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step6"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step6">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step6.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step7"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step7">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step7.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step8"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step8">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step8.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step9"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step9">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step9.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step10"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step10">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step10.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
-            <param>external_stats_db_name=${external_stats_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
+			<param>external_stats_db_name=${external_stats_db_name}</param>
        </hive2>
        <ok to="Step11"/>
        <error to="Kill"/>
-    </action>
+    </action>    

    <action name="Step11">
-        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+    	<hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step11.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
-            <param>external_stats_db_name=${external_stats_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
+			<param>external_stats_db_name=${external_stats_db_name}</param>
        </hive2>
        <ok to="Step12"/>
        <error to="Kill"/>
-    </action>
-
+    </action>  
+    
    <action name="Step12">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step12.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step13"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step13">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step13.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step14"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step14">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step14.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step15"/>
        <error to="Kill"/>
    </action>
-
+    
    <action name="Step15">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
            <script>scripts/step15.sql</script>
-            <param>stats_db_name=${stats_db_name}</param>
-            <param>openaire_db_name=${openaire_db_name}</param>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>openaire_db_name=${openaire_db_name}</param>
        </hive2>
        <ok to="Step15_5"/>
        <error to="Kill"/>
@ -345,35 +321,13 @@
        <error to="Kill"/>
    </action>

-<!--    <action name="Step16-createIndicatorsTables">-->
-<!--        <hive2 xmlns="uri:oozie:hive2-action:0.1">-->
-<!--            <jdbc-url>${hive_jdbc_url}</jdbc-url>-->
-<!--            <script>scripts/step16-createIndicatorsTables.sql</script>-->
-<!--            <param>stats_db_name=${stats_db_name}</param>-->
-<!--            <param>external_stats_db_name=${external_stats_db_name}</param>-->
-<!--        </hive2>-->
-<!--        <ok to="Step16_1-definitions"/>-->
-<!--        <error to="Kill"/>-->
-<!--    </action>-->
-
    <action name="Step16-createIndicatorsTables">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Step16-createIndicatorsTables</name>
-            <class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
-            <jar>dhp-stats-update-${projectVersion}.jar</jar>
-            <spark-opts>
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-                ${sparkClusterOpts}
-                ${sparkResourceOpts}
-                ${sparkApplicationOpts}
-            </spark-opts>
-            <arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
-            <arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql</arg>
-            <arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
-            <arg>--external_stats_db_name</arg><arg>${external_stats_db_name}</arg>
-        </spark>
+        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+            <jdbc-url>${hive_jdbc_url}</jdbc-url>
+            <script>scripts/step16-createIndicatorsTables.sql</script>
+            <param>stats_db_name=${stats_db_name}</param>
+            <param>external_stats_db_name=${external_stats_db_name}</param>
+        </hive2>
        <ok to="Step16_1-definitions"/>
        <error to="Kill"/>
    </action>
@ -433,18 +387,18 @@
        <error to="Kill"/>
    </action>

-    <!--    <action name="step20-createMonitorDB-post">-->
-    <!--        <shell xmlns="uri:oozie:shell-action:0.1">-->
-    <!--            <job-tracker>${jobTracker}</job-tracker>-->
-    <!--            <name-node>${nameNode}</name-node>-->
-    <!--            <exec>monitor-post.sh</exec>-->
-    <!--            <argument>${monitor_db_name}</argument>-->
-    <!--            <argument>${monitor_db_shadow_name}</argument>-->
-    <!--            <file>monitor-post.sh</file>-->
-    <!--        </shell>-->
-    <!--        <ok to="step21-createObservatoryDB-pre"/>-->
-    <!--        <error to="Kill"/>-->
-    <!--    </action>-->
+<!--    <action name="step20-createMonitorDB-post">-->
+<!--        <shell xmlns="uri:oozie:shell-action:0.1">-->
+<!--            <job-tracker>${jobTracker}</job-tracker>-->
+<!--            <name-node>${nameNode}</name-node>-->
+<!--            <exec>monitor-post.sh</exec>-->
+<!--            <argument>${monitor_db_name}</argument>-->
+<!--            <argument>${monitor_db_shadow_name}</argument>-->
+<!--            <file>monitor-post.sh</file>-->
+<!--        </shell>-->
+<!--        <ok to="step21-createObservatoryDB-pre"/>-->
+<!--        <error to="Kill"/>-->
+<!--    </action>-->

    <action name="step21-createObservatoryDB-pre">
        <shell xmlns="uri:oozie:shell-action:0.1">
@ -489,8 +443,8 @@
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <exec>copyDataToImpalaCluster.sh</exec>
-            <!--            <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>-->
-            <!--            <argument>${external_stats_db_name}</argument>-->
+<!--            <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>-->
+<!--            <argument>${external_stats_db_name}</argument>-->
            <argument>${stats_db_name}</argument>
            <argument>${monitor_db_name}</argument>
            <argument>${observatory_db_name}</argument>
@ -551,4 +505,4 @@
    </action>

    <end name="End"/>
-</workflow-app>
+</workflow-app>
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@ -31,10 +31,6 @@
        <module>dhp-enrichment</module>
        <module>dhp-graph-provision</module>
        <module>dhp-blacklist</module>
-        <module>dhp-stats-actionsets</module>
-        <module>dhp-stats-hist-snaps</module>
-        <module>dhp-stats-monitor-irish</module>
-        <module>dhp-stats-monitor-update</module>
        <module>dhp-stats-update</module>
        <module>dhp-stats-promote</module>
        <module>dhp-usage-stats-build</module>
Author	SHA1	Message	Date
Michele Artini	6e380993d6	added filter for typenorm field	2024-03-12 10:09:38 +01:00
Michele Artini	f9416ab2aa	minor	2024-03-12 09:20:37 +01:00
Michele Artini	3e56b88a5f	base transformation rule test	2024-03-11 14:48:37 +01:00
Michele Artini	dfb05ebedb	unitTest	2024-03-11 11:44:04 +01:00
Michele Artini	341c3f798e	xslt	2024-03-11 10:50:17 +01:00
Michele Artini	4145065481	mapping for ODF	2024-03-08 15:31:08 +01:00
Michele Artini	5e9102d404	xslt	2024-03-08 14:41:25 +01:00
Michele Artini	dc9d642e66	xslt	2024-03-08 12:37:54 +01:00
Michele Artini	932173287a	added a base vocabulary	2024-03-08 11:52:58 +01:00
Michele Artini	3b5163d8e3	fixed a xpath	2024-03-08 08:23:07 +01:00
Michele Artini	6648d710a3	xslt	2024-03-07 13:06:41 +01:00
Michele Artini	a059747f16	xslt	2024-03-07 10:57:41 +01:00
Michele Artini	1e34585213	file renaming	2024-03-07 09:52:49 +01:00
Michele Artini	108478b778	xslt	2024-03-07 09:34:16 +01:00
Michele Artini	6500151c90	initial version of the BASE Transformation rule	2024-03-05 14:18:57 +01:00
Michele Artini	af58cd726e	initial version of the BASE Transformation rule	2024-03-05 11:48:36 +01:00
Michele Artini	efbb6c37d6	updated sql for base	2024-03-05 09:37:04 +01:00
Michele Artini	b206e9a30b	sql to create the BASE datasource	2024-03-04 15:13:34 +01:00
Michele Artini	db6f774394	selection of the new plugin	2024-03-04 10:43:40 +01:00
Michele Artini	9506d80ddc	changed sql to select accepted datasources	2024-03-04 08:25:40 +01:00
Michele Artini	c2b6841eb0	opendoar datasource filter	2024-03-01 15:32:56 +01:00
Michele Artini	be7f327e88	opendoar datasource filter	2024-03-01 13:38:36 +01:00
Michele Artini	32f4d6f691	reports for types	2024-03-01 11:43:37 +01:00
Michele Artini	71204a8056	some fields in stats	2024-02-29 10:17:31 +01:00
Michele Artini	5ddbef3a5b	new stats	2024-02-28 14:34:09 +01:00
Michele Artini	04dd31139b	separator in sql query	2024-02-28 11:12:25 +01:00
Michele Artini	3d14bef381	OpenDoar reports	2024-02-28 10:51:13 +01:00
Michele Artini	f8cf7ffbcb	stats	2024-02-22 14:01:11 +01:00
Michele Artini	d2b7541583	fixed a problem with Dataset model	2024-02-16 11:36:46 +01:00
Michele Artini	8ffdd9747d	added id to BaseRecordInfo	2024-02-15 14:27:50 +01:00
Michele Artini	da65728afe	produce a parquet file	2024-02-15 14:04:17 +01:00
Michele Artini	e254720377	fixed path reports	2024-02-15 08:52:28 +01:00
Michele Artini	8d85c1e97e	used a parser STAX	2024-02-15 08:21:52 +01:00
Michele Artini	b42e2b4d61	fixed log class	2024-02-14 15:52:31 +01:00
Michele Artini	773346f638	increased memory	2024-02-14 14:40:27 +01:00
Michele Artini	2e11197142	removed invalid deletion	2024-02-14 11:59:30 +01:00
Michele Artini	ddd6a7ceb3	minor fixes	2024-02-14 11:39:37 +01:00
Michele Artini	963a2500be	new reports in hadoop job	2024-02-14 10:37:39 +01:00
Michele Artini	4b1ecad4e2	prepared a job to analyze the BASE records	2024-02-13 13:48:26 +01:00
Michele Artini	dd7350ecf2	fixed a problem with xpaths	2024-02-13 08:36:42 +01:00
Michele Artini	265bfd364d	refactoing	2024-02-12 15:35:36 +01:00
Michele Artini	16766c514e	refactoring	2024-02-12 12:19:57 +01:00
Michele Artini	5add433b74	partial refactoring	2024-02-09 14:33:04 +01:00
Michele Artini	c974c75f83	partial refactoring	2024-02-09 12:36:20 +01:00
Michele Artini	c6db6335b9	prepare filtering for base import	2024-02-06 15:10:29 +01:00
Michele Artini	abcd81bba0	first implementation of the collection plugin for BASE	2024-02-05 15:19:41 +01:00