+
workingDirPath
the source path
+
+ index
+ the index name
+
+
+ esCluster
+ the Index cluster
+
sparkDriverMemory
memory for driver process
@@ -12,39 +20,43 @@
sparkExecutorMemory
memory for individual executor
-
- index
- index name
-
-
-
- indexHost
- index host name
-
-
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ eu.dnetlib.dhp.provision.DropAndCreateESIndex
+ -i${index}
+ -c${esCluster}
+
+
+
+
+
+
${jobTracker}
${nameNode}
yarn-cluster
cluster
- index Summary
+ index summary
eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
dhp-graph-provision-scholexplorer-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="32"
+ --executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8"
-mt yarn-cluster
- --sourcePath${workingDirPath}/summary
+ --sourcePath${workingDirPath}/summary_json
--index${index}_object
- --esHost${indexHost}
--idPathid
- --typesummary
+ --cluster${esCluster}
@@ -63,9 +75,8 @@
-mt yarn-cluster
--sourcePath${workingDirPath}/scholix_json
--index${index}_scholix
- --esHost${indexHost}
--idPathidentifier
- --typescholix
+ --cluster${esCluster}
diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/provision/oozie_app/workflow.xml
index c2c2a78fb..4c0d6c1da 100644
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/provision/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/provision/oozie_app/workflow.xml
@@ -112,59 +112,5 @@
-
-
-
- ${jobTracker}
- ${nameNode}
- eu.dnetlib.dhp.provision.DropAndCreateESIndex
- -i${index}
- -c${esCluster}
-
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
- cluster
- index summary
- eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
- dhp-graph-provision-scholexplorer-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8"
- -mt yarn-cluster
- --sourcePath${workingDirPath}/summary_json
- --index${index}_object
- --idPathid
- --cluster${esCluster}
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
- cluster
- index scholix
- eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
- dhp-graph-provision-scholexplorer-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8"
- -mt yarn-cluster
- --sourcePath${workingDirPath}/scholix_json
- --index${index}_scholix
- --idPathidentifier
- --cluster${esCluster}
-
-
-
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-provision/README.md b/dhp-workflows/dhp-graph-provision/README.md
new file mode 100644
index 000000000..973a5909d
--- /dev/null
+++ b/dhp-workflows/dhp-graph-provision/README.md
@@ -0,0 +1,21 @@
+Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. The
+operation considers all the entity types (publication, dataset, software, ORP, project, datasource, organization, and
+all the possible relationships (similarity links produced by the Dedup process are excluded).
+
+The operation is implemented by sequentially joining one entity type at time (E) with the relationships (R), and
+again by E, finally grouped by E.id;
+
+The workflow is organized in different parts aimed to to reduce the complexity of the operation
+
+1) PrepareRelationsJob: only consider relationships that are not virtually deleted ($.dataInfo.deletedbyinference ==
+false), each entity can be linked at most to 100 other objects
+
+2) CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
+E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join (R.target =
+T_i.id) save the tuples (R_i, T_i) (phase 2): create the union of all the entity types E, hash by id read the tuples
+(R, T), hash by R.source join E.id = (R, T).source, where E becomes the Source Entity S save the tuples (S, R, T)
+
+3) AdjacencyListBuilderJob: given the tuple (S - R - T) we need to group by S.id -> List [ R - T ], mapping the
+result as JoinedEntity
+
+4) XmlConverterJob: convert the JoinedEntities as XML records
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/AdjacencyListBuilderJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/AdjacencyListBuilderJob.java
deleted file mode 100644
index d9cc03cd5..000000000
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/AdjacencyListBuilderJob.java
+++ /dev/null
@@ -1,109 +0,0 @@
-
-package eu.dnetlib.dhp.oa.provision;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.api.java.function.MapGroupsFunction;
-import org.apache.spark.sql.*;
-import org.apache.spark.sql.expressions.Aggregator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.common.HdfsSupport;
-import eu.dnetlib.dhp.oa.provision.model.*;
-import scala.Tuple2;
-import scala.collection.JavaConverters;
-import scala.collection.Seq;
-
-/**
- * Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. The
- * operation considers all the entity types (publication, dataset, software, ORP, project, datasource, organization, and
- * all the possible relationships (similarity links produced by the Dedup process are excluded).
- *
- * The operation is implemented by sequentially joining one entity type at time (E) with the relationships (R), and
- * again by E, finally grouped by E.id;
- *
- * The workflow is organized in different parts aimed to to reduce the complexity of the operation 1)
- * PrepareRelationsJob: only consider relationships that are not virtually deleted ($.dataInfo.deletedbyinference ==
- * false), each entity can be linked at most to 100 other objects
- *
- * 2) JoinRelationEntityByTargetJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
- * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join (R.target =
- * T_i.id) save the tuples (R_i, T_i) (phase 2): create the union of all the entity types E, hash by id read the tuples
- * (R, T), hash by R.source join E.id = (R, T).source, where E becomes the Source Entity S save the tuples (S, R, T)
- *
- * 3) AdjacencyListBuilderJob: given the tuple (S - R - T) we need to group by S.id -> List [ R - T ], mapping the
- * result as JoinedEntity
- *
- * 4) XmlConverterJob: convert the JoinedEntities as XML records
- */
-public class AdjacencyListBuilderJob {
-
- private static final Logger log = LoggerFactory.getLogger(AdjacencyListBuilderJob.class);
-
- public static final int MAX_LINKS = 100;
-
- public static void main(String[] args) throws Exception {
-
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(
- IOUtils
- .toString(
- AdjacencyListBuilderJob.class
- .getResourceAsStream(
- "/eu/dnetlib/dhp/oa/provision/input_params_build_adjacency_lists.json")));
- parser.parseArgument(args);
-
- Boolean isSparkSessionManaged = Optional
- .ofNullable(parser.get("isSparkSessionManaged"))
- .map(Boolean::valueOf)
- .orElse(Boolean.TRUE);
- log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
- String inputPath = parser.get("inputPath");
- log.info("inputPath: {}", inputPath);
-
- String outputPath = parser.get("outputPath");
- log.info("outputPath: {}", outputPath);
-
- SparkConf conf = new SparkConf();
- conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
- conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
-
- runWithSparkSession(
- conf,
- isSparkSessionManaged,
- spark -> {
- removeOutputDir(spark, outputPath);
- createAdjacencyListsKryo(spark, inputPath, outputPath);
- });
- }
-
- private static void createAdjacencyListsKryo(
- SparkSession spark, String inputPath, String outputPath) {
-
- log.info("Reading joined entities from: {}", inputPath);
-
- final List paths = HdfsSupport
- .listFiles(inputPath, spark.sparkContext().hadoopConfiguration());
-
- log.info("Found paths: {}", String.join(",", paths));
-
- }
-
- private static Seq toSeq(List list) {
- return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
- }
-
- private static void removeOutputDir(SparkSession spark, String path) {
- HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
- }
-}
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java
index b08e593f7..dd251ec04 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java
@@ -31,26 +31,9 @@ import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
/**
- * Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. The
- * operation considers all the entity types (publication, dataset, software, ORP, project, datasource, organization, and
- * all the possible relationships (similarity links produced by the Dedup process are excluded).
- *
- * The operation is implemented by sequentially joining one entity type at time (E) with the relationships (R), and
- * again by E, finally grouped by E.id;
- *
- * The workflow is organized in different parts aimed to to reduce the complexity of the operation 1)
- * PrepareRelationsJob: only consider relationships that are not virtually deleted ($.dataInfo.deletedbyinference ==
- * false), each entity can be linked at most to 100 other objects
- *
- * 2) JoinRelationEntityByTargetJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
- * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join (R.target =
- * T_i.id) save the tuples (R_i, T_i) (phase 2): create the union of all the entity types E, hash by id read the tuples
- * (R, T), hash by R.source join E.id = (R, T).source, where E becomes the Source Entity S save the tuples (S, R, T)
- *
- * 3) AdjacencyListBuilderJob: given the tuple (S - R - T) we need to group by S.id -> List [ R - T ], mapping the
- * result as JoinedEntity
- *
- * 4) XmlConverterJob: convert the JoinedEntities as XML records
+ * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
+ * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join
+ * (R.target = T_i.id) save the tuples (R_i, T_i)
*/
public class CreateRelatedEntitiesJob_phase1 {
@@ -109,7 +92,6 @@ public class CreateRelatedEntitiesJob_phase1 {
String outputPath) {
Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath)
- .filter("dataInfo.deletedbyinference == false")
.map(
(MapFunction>) r -> new Tuple2<>(r.getTarget(),
r),
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java
index 7e175121e..9cdf1cd2e 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java
@@ -34,26 +34,8 @@ import scala.collection.JavaConverters;
import scala.collection.Seq;
/**
- * Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. The
- * operation considers all the entity types (publication, dataset, software, ORP, project, datasource, organization, and
- * all the possible relationships (similarity links produced by the Dedup process are excluded).
- *
- * The operation is implemented by sequentially joining one entity type at time (E) with the relationships (R), and
- * again by E, finally grouped by E.id;
- *
- * The workflow is organized in different parts aimed to to reduce the complexity of the operation 1)
- * PrepareRelationsJob: only consider relationships that are not virtually deleted ($.dataInfo.deletedbyinference ==
- * false), each entity can be linked at most to 100 other objects
- *
- * 2) JoinRelationEntityByTargetJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
- * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join (R.target =
- * T_i.id) save the tuples (R_i, T_i) (phase 2): create the union of all the entity types E, hash by id read the tuples
+ * CreateRelatedEntitiesJob (phase 2): create the union of all the entity types E, hash by id read the tuples
* (R, T), hash by R.source join E.id = (R, T).source, where E becomes the Source Entity S save the tuples (S, R, T)
- *
- * 3) AdjacencyListBuilderJob: given the tuple (S - R - T) we need to group by S.id -> List [ R - T ], mapping the
- * result as JoinedEntity
- *
- * 4) XmlConverterJob: convert the JoinedEntities as XML records
*/
public class CreateRelatedEntitiesJob_phase2 {
@@ -123,7 +105,7 @@ public class CreateRelatedEntitiesJob_phase2 {
TypedColumn aggregator = new AdjacencyListAggregator().toColumn();
entities
- .joinWith(relatedEntities, entities.col("_1").equalTo(relatedEntities.col("_1")), "left_outer")
+ .joinWith(relatedEntities, entities.col("_1").equalTo(relatedEntities.col("_1")), "left")
.map((MapFunction, Tuple2>, JoinedEntity>) value -> {
JoinedEntity je = new JoinedEntity(value._1()._2());
Optional
@@ -132,7 +114,6 @@ public class CreateRelatedEntitiesJob_phase2 {
.ifPresent(r -> je.getLinks().add(r));
return je;
}, Encoders.kryo(JoinedEntity.class))
- .filter(filterEmptyEntityFn())
.groupByKey(
(MapFunction) value -> value.getEntity().getId(),
Encoders.STRING())
@@ -140,7 +121,6 @@ public class CreateRelatedEntitiesJob_phase2 {
.map(
(MapFunction, JoinedEntity>) value -> value._2(),
Encoders.kryo(JoinedEntity.class))
- .filter(filterEmptyEntityFn())
.write()
.mode(SaveMode.Overwrite)
.parquet(outputPath);
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java
index da0a81021..c87f0cd94 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java
@@ -3,8 +3,10 @@ package eu.dnetlib.dhp.oa.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-import java.util.*;
-import java.util.function.Supplier;
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.PriorityQueue;
+import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
@@ -15,8 +17,10 @@ import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.rdd.RDD;
-import org.apache.spark.sql.*;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.expressions.Aggregator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -24,7 +28,6 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
-import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@@ -36,26 +39,8 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
/**
- * Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. The
- * operation considers all the entity types (publication, dataset, software, ORP, project, datasource, organization, and
- * all the possible relationships (similarity links produced by the Dedup process are excluded).
- *
- * The operation is implemented by sequentially joining one entity type at time (E) with the relationships (R), and
- * again by E, finally grouped by E.id;
- *
- * The workflow is organized in different parts aimed to to reduce the complexity of the operation 1)
- * PrepareRelationsJob: only consider relationships that are not virtually deleted ($.dataInfo.deletedbyinference ==
- * false), each entity can be linked at most to 100 other objects
- *
- * 2) JoinRelationEntityByTargetJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
- * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join (R.target =
- * T_i.id) save the tuples (R_i, T_i) (phase 2): create the union of all the entity types E, hash by id read the tuples
- * (R, T), hash by R.source join E.id = (R, T).source, where E becomes the Source Entity S save the tuples (S, R, T)
- *
- * 3) AdjacencyListBuilderJob: given the tuple (S - R - T) we need to group by S.id -> List [ R - T ], mapping the
- * result as JoinedEntity
- *
- * 4) XmlConverterJob: convert the JoinedEntities as XML records
+ * PrepareRelationsJob prunes the relationships: only consider relationships that are not virtually deleted
+ * ($.dataInfo.deletedbyinference == false), each entity can be linked at most to 100 other objects
*/
public class PrepareRelationsJob {
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java
index a1ed7fd2a..b44ed7446 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java
@@ -2,12 +2,11 @@
package eu.dnetlib.dhp.oa.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
-import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
@@ -28,39 +27,19 @@ import com.google.common.collect.Maps;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
-import eu.dnetlib.dhp.oa.provision.model.*;
+import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
+import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
-import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
-import scala.collection.JavaConverters;
-import scala.collection.Seq;
/**
- * Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. The
- * operation considers all the entity types (publication, dataset, software, ORP, project, datasource, organization, and
- * all the possible relationships (similarity links produced by the Dedup process are excluded).
- *
- * The workflow is organized in different parts aimed to to reduce the complexity of the operation 1)
- * PrepareRelationsJob: only consider relationships that are not virtually deleted ($.dataInfo.deletedbyinference ==
- * false), each entity can be linked at most to 100 other objects
- *
- * 2) JoinRelationEntityByTargetJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
- * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join (R.target =
- * T_i.id) save the tuples (R_i, T_i) (phase 2): create the union of all the entity types E, hash by id read the tuples
- * (R, T), hash by R.source join E.id = (R, T).source, where E becomes the Source Entity S save the tuples (S, R, T)
- *
- * 3) AdjacencyListBuilderJob: given the tuple (S - R - T) we need to group by S.id -> List [ R - T ], mapping the
- * result as JoinedEntity
- *
- * 4) XmlConverterJob: convert the JoinedEntities as XML records
+ * XmlConverterJob converts the JoinedEntities as XML records
*/
public class XmlConverterJob {
private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
public static void main(String[] args) throws Exception {
@@ -145,10 +124,6 @@ public class XmlConverterJob {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
- private static Seq toSeq(List list) {
- return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
- }
-
private static Map prepareAccumulators(SparkContext sc) {
Map accumulators = Maps.newHashMap();
accumulators
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
new file mode 100644
index 000000000..ced7bbc11
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
@@ -0,0 +1,32 @@
+-------------------------------------------
+--- Extra tables, mostly used by indicators
+
+create table ${stats_db_name}.result_projectcount as
+select r.id, count(distinct p.id) as count
+from ${stats_db_name}.result r
+left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
+left outer join ${stats_db_name}.project p on p.id=rp.project
+group by r.id;
+
+create table ${stats_db_name}.result_fundercount as
+select r.id, count(distinct p.funder) as count
+from ${stats_db_name}.result r
+left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
+left outer join ${stats_db_name}.project p on p.id=rp.project
+group by r.id;
+
+create table ${stats_db_name}.project_resultcount as
+with rcount as (
+ select p.id as pid, count(distinct r.id) as `count`, r.type as type
+ from ${stats_db_name}.project p
+ left outer join ${stats_db_name}.result_projects rp on rp.project=p.id
+ left outer join ${stats_db_name}.result r on r.id=rp.id
+ group by r.type, p.id )
+select rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications,
+ sum(case when rcount.type='dataset' then rcount.count else 0 end) as datasets,
+ sum(case when rcount.type='software' then rcount.count else 0 end) as software,
+ sum(case when rcount.type='other' then rcount.count else 0 end) as other
+from rcount
+group by rcount.pid;
+
+create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql
index e002f656e..5c102d014 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql
@@ -5,8 +5,12 @@
------------------------------------------------------
-- Dropping old views
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.category;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.concept;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.context;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.country;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.countrygdp;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.creation_date;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_citations;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_classifications;
@@ -16,6 +20,7 @@ DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_languages;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_licenses;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_oids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_pids;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_refereed;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_sources;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_topics;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource;
@@ -23,11 +28,15 @@ DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_languages;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_oids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_organizations;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_results;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_sources;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.funder;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.fundref;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.numbers_country;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_datasources;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_pids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_projects;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_sources;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications;
@@ -37,12 +46,15 @@ DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_oids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_organizations;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_resultcount;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results_publication;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_citations;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_classifications;
@@ -52,19 +64,28 @@ DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_languages;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_licenses;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_oids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_pids;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_refereed;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_sources;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_topics;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_affiliated_country;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_citations;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_classifications;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_concepts;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_datasources;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_deposited_country;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_fundercount;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_gold;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_greenoa;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_languages;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_licenses;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_oids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_organization;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_peerreviewed;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_pids;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projectcount;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projects;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_refereed;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_sources;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_topics;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.rndexpediture;
@@ -78,6 +99,7 @@ DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_languages;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_licenses;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_oids;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_pids;
+DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_refereed;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_sources;
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_topics;
@@ -86,8 +108,12 @@ DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_topics;
CREATE database IF NOT EXISTS ${stats_db_shadow_name};
-- Creating new views
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.category AS SELECT * FROM ${stats_db_name}.category;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.concept AS SELECT * FROM ${stats_db_name}.concept;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.context AS SELECT * FROM ${stats_db_name}.context;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.country AS SELECT * FROM ${stats_db_name}.country;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.countrygdp AS SELECT * FROM ${stats_db_name}.countrygdp;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.creation_date AS SELECT * FROM ${stats_db_name}.creation_date;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset AS SELECT * FROM ${stats_db_name}.dataset;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_citations AS SELECT * FROM ${stats_db_name}.dataset_citations;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_classifications AS SELECT * FROM ${stats_db_name}.dataset_classifications;
@@ -97,6 +123,7 @@ CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_languages AS SELECT *
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_licenses AS SELECT * FROM ${stats_db_name}.dataset_licenses;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_oids AS SELECT * FROM ${stats_db_name}.dataset_oids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_pids AS SELECT * FROM ${stats_db_name}.dataset_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_refereed AS SELECT * FROM ${stats_db_name}.dataset_refereed;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_sources AS SELECT * FROM ${stats_db_name}.dataset_sources;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_topics AS SELECT * FROM ${stats_db_name}.dataset_topics;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource AS SELECT * FROM ${stats_db_name}.datasource;
@@ -104,11 +131,15 @@ CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_languages AS SELECT
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_oids AS SELECT * FROM ${stats_db_name}.datasource_oids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_organizations AS SELECT * FROM ${stats_db_name}.datasource_organizations;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_results AS SELECT * FROM ${stats_db_name}.datasource_results;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_sources AS SELECT * FROM ${stats_db_name}.datasource_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.funder AS SELECT * FROM ${stats_db_name}.funder;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.fundref AS SELECT * FROM ${stats_db_name}.fundref;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.numbers_country AS SELECT * FROM ${stats_db_name}.numbers_country;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization AS SELECT * FROM ${stats_db_name}.organization;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_datasources AS SELECT * FROM ${stats_db_name}.organization_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_pids AS SELECT * FROM ${stats_db_name}.organization_pids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_projects AS SELECT * FROM ${stats_db_name}.organization_projects;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_sources AS SELECT * FROM ${stats_db_name}.organization_sources;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct AS SELECT * FROM ${stats_db_name}.otherresearchproduct;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations AS SELECT * FROM ${stats_db_name}.otherresearchproduct_citations;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications AS SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications;
@@ -118,12 +149,15 @@ CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses AS SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_oids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed AS SELECT * FROM ${stats_db_name}.otherresearchproduct_refereed;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics AS SELECT * FROM ${stats_db_name}.otherresearchproduct_topics;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project AS SELECT * FROM ${stats_db_name}.project;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_oids AS SELECT * FROM ${stats_db_name}.project_oids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_organizations AS SELECT * FROM ${stats_db_name}.project_organizations;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results AS SELECT * FROM ${stats_db_name}.project_results;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_resultcount AS SELECT * FROM ${stats_db_name}.project_resultcount;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results_publication AS SELECT * FROM ${stats_db_name}.project_results_publication;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication AS SELECT * FROM ${stats_db_name}.publication;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_citations AS SELECT * FROM ${stats_db_name}.publication_citations;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications;
@@ -133,19 +167,28 @@ CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_languages AS SELEC
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_oids AS SELECT * FROM ${stats_db_name}.publication_oids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_pids AS SELECT * FROM ${stats_db_name}.publication_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_refereed AS SELECT * FROM ${stats_db_name}.publication_refereed;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_sources AS SELECT * FROM ${stats_db_name}.publication_sources;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_topics AS SELECT * FROM ${stats_db_name}.publication_topics;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result AS SELECT * FROM ${stats_db_name}.result;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_affiliated_country AS SELECT * FROM ${stats_db_name}.result_affiliated_country;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_citations AS SELECT * FROM ${stats_db_name}.result_citations;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_classifications AS SELECT * FROM ${stats_db_name}.result_classifications;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_concepts AS SELECT * FROM ${stats_db_name}.result_concepts;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_datasources AS SELECT * FROM ${stats_db_name}.result_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_deposited_country AS SELECT * FROM ${stats_db_name}.result_deposited_country;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_fundercount AS SELECT * FROM ${stats_db_name}.result_fundercount;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_gold AS SELECT * FROM ${stats_db_name}.result_gold;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_greenoa AS SELECT * FROM ${stats_db_name}.result_greenoa;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_languages AS SELECT * FROM ${stats_db_name}.result_languages;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_licenses AS SELECT * FROM ${stats_db_name}.result_licenses;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_oids AS SELECT * FROM ${stats_db_name}.result_oids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_organization AS SELECT * FROM ${stats_db_name}.result_organization;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_peerreviewed AS SELECT * FROM ${stats_db_name}.result_peerreviewed;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_pids AS SELECT * FROM ${stats_db_name}.result_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projectcount AS SELECT * FROM ${stats_db_name}.result_projectcount;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projects AS SELECT * FROM ${stats_db_name}.result_projects;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_refereed AS SELECT * FROM ${stats_db_name}.result_refereed;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_sources AS SELECT * FROM ${stats_db_name}.result_sources;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_topics AS SELECT * FROM ${stats_db_name}.result_topics;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.rndexpediture AS SELECT * FROM ${stats_db_name}.rndexpediture;
@@ -159,5 +202,6 @@ CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_languages AS SELECT *
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_licenses AS SELECT * FROM ${stats_db_name}.software_licenses;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_oids AS SELECT * FROM ${stats_db_name}.software_oids;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_pids AS SELECT * FROM ${stats_db_name}.software_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_refereed AS SELECT * FROM ${stats_db_name}.software_refereed;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_sources AS SELECT * FROM ${stats_db_name}.software_sources;
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_topics AS SELECT * FROM ${stats_db_name}.software_topics;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql
index 5645db309..34e48a18a 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql
@@ -5,77 +5,4 @@
------------------------------------------------------
------------------------------------------------------
-COMPUTE STATS country;
-COMPUTE STATS countrygdp;
-COMPUTE STATS dataset;
-COMPUTE STATS dataset_citations;
-COMPUTE STATS dataset_classifications;
-COMPUTE STATS dataset_concepts;
-COMPUTE STATS dataset_datasources;
-COMPUTE STATS dataset_languages;
-COMPUTE STATS dataset_oids;
-COMPUTE STATS dataset_pids;
-COMPUTE STATS dataset_sources;
-COMPUTE STATS dataset_topics;
-COMPUTE STATS datasource;
-COMPUTE STATS datasource_languages;
-COMPUTE STATS datasource_oids;
-COMPUTE STATS datasource_organizations;
-COMPUTE STATS datasource_results;
-COMPUTE STATS fundref;
-COMPUTE STATS numbers_country;
-COMPUTE STATS organization;
-COMPUTE STATS organization_datasources;
-COMPUTE STATS organization_projects;
-COMPUTE STATS otherresearchproduct;
-COMPUTE STATS otherresearchproduct_citations;
-COMPUTE STATS otherresearchproduct_classifications;
-COMPUTE STATS otherresearchproduct_concepts;
-COMPUTE STATS otherresearchproduct_datasources;
-COMPUTE STATS otherresearchproduct_languages;
-COMPUTE STATS otherresearchproduct_licenses;
-COMPUTE STATS otherresearchproduct_oids;
-COMPUTE STATS otherresearchproduct_pids;
-COMPUTE STATS otherresearchproduct_sources;
-COMPUTE STATS otherresearchproduct_topics;
-COMPUTE STATS project;
-COMPUTE STATS project_oids;
-COMPUTE STATS project_organizations;
-COMPUTE STATS project_results;
-COMPUTE STATS publication;
-COMPUTE STATS publication_citations;
-COMPUTE STATS publication_classifications;
-COMPUTE STATS publication_concepts;
-COMPUTE STATS publication_datasources;
-COMPUTE STATS publication_languages;
-COMPUTE STATS publication_licenses;
-COMPUTE STATS publication_oids;
-COMPUTE STATS publication_pids;
-COMPUTE STATS publication_sources;
-COMPUTE STATS publication_topics;
-COMPUTE STATS result;
-COMPUTE STATS result_citations;
-COMPUTE STATS result_classifications;
-COMPUTE STATS result_concepts;
-COMPUTE STATS result_datasources;
-COMPUTE STATS result_languages;
-COMPUTE STATS result_licenses;
-COMPUTE STATS result_oids;
-COMPUTE STATS result_organization;
-COMPUTE STATS result_pids;
-COMPUTE STATS result_projects;
-COMPUTE STATS result_sources;
-COMPUTE STATS result_topics;
-COMPUTE STATS rndexpediture;
-COMPUTE STATS roarmap;
-COMPUTE STATS software;
-COMPUTE STATS software_citations;
-COMPUTE STATS software_classifications;
-COMPUTE STATS software_concepts;
-COMPUTE STATS software_datasources;
-COMPUTE STATS software_languages;
-COMPUTE STATS software_licenses;
-COMPUTE STATS software_oids;
-COMPUTE STATS software_pids;
-COMPUTE STATS software_sources;
-COMPUTE STATS software_topics;
+INVALIDATE METADATA ${stats_db_name};
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql
new file mode 100644
index 000000000..34e48a18a
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql
@@ -0,0 +1,8 @@
+------------------------------------------------------
+------------------------------------------------------
+-- Impala table statistics - Needed to make the tables
+-- visible for impala
+------------------------------------------------------
+------------------------------------------------------
+
+INVALIDATE METADATA ${stats_db_name};
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
index 312a8b82e..ba0db25be 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
@@ -17,19 +17,28 @@ case when size(p.description) > 0 then true else false end as abstract,
from ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype;
+CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context;
+CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.publication_datasources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM (SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance) p LEFT OUTER JOIN (SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
+CREATE TABLE ${stats_db_name}.publication_datasources as
+SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
+ FROM (
+ SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
+ from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
+ where p.datainfo.deletedbyinference=false ) p
+ LEFT OUTER JOIN (
+ SELECT substr(d.id, 4) id
+ from ${openaire_db_name}.datasource d
+ WHERE d.datainfo.deletedbyinference=false ) d on p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p;
+CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids;
+CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid;
+CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject;
+CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
-- Publication_citations
-CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="";
\ No newline at end of file
+CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and p.datainfo.deletedbyinference=false;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql
index 47a102525..f69715a31 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql
@@ -17,20 +17,20 @@ FROM ${openaire_db_name}.dataset d
WHERE d.datainfo.deletedbyinference=FALSE;
-- Dataset_citations
-CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="";
+CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and d.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype;
+CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context;
+CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false;
CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
-FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance) p LEFT OUTER JOIN
+FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN
(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p;
+CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids;
+CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid;
+CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject;
+CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql
index ca1059cc8..2c4a625e1 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql
@@ -17,20 +17,20 @@ from ${openaire_db_name}.software s
where s.datainfo.deletedbyinference=false;
-- Software_citations
-CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="";
+CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and s.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype;
+CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context;
+CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false;
CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
-FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance) p LEFT OUTER JOIN
+FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN
(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p;
+CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids;
+CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid;
+CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject;
+CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql
index b4fb5aec6..1fa5df8cb 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql
@@ -17,21 +17,20 @@ FROM ${openaire_db_name}.otherresearchproduct o
WHERE o.datainfo.deletedbyinference=FALSE;
-- Otherresearchproduct_citations
-CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="";
+CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and o.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype;
-
-CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
-from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance) p LEFT OUTER JOIN
+from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN
(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
index 174d78901..20eec37dc 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@@ -46,7 +46,7 @@
-
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
@@ -237,6 +237,17 @@
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
+
+
+
+
+
+
+ ${hive_jdbc_url}
+
+ stats_db_name=${stats_db_name}
+ openaire_db_name=${openaire_db_name}
+
@@ -259,12 +270,26 @@
impala-shell.sh
${stats_db_name}
step18.sql
- /user/${wf:user()}/oa/graph/stats/oozie_app/scripts/step18.sql
+ ${wf:appPath()}/scripts/step18.sql
+ impala-shell.sh
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ impala-shell.sh
+ ${stats_db_shadow_name}
+ step19.sql
+ ${wf:appPath()}/scripts/step19.sql
impala-shell.sh
-
+
diff --git a/pom.xml b/pom.xml
index 03c69108d..e9b90a765 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,7 +50,7 @@