diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java
new file mode 100644
index 0000000000..c7cae1fcbd
--- /dev/null
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java
@@ -0,0 +1,63 @@
+
+package eu.dnetlib.doiboost.crossref;
+
+import java.io.BufferedOutputStream;
+import java.net.URI;
+import java.util.zip.GZIPOutputStream;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.mortbay.log.Log;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class ExtractCrossrefRecords {
+ public static void main(String[] args) throws Exception {
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+ IOUtils
+ .toString(
+ ExtractCrossrefRecords.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json")));
+ parser.parseArgument(args);
+ final String hdfsServerUri = parser.get("hdfsServerUri");
+ final String workingPath = parser.get("workingPath");
+ final String outputPath = parser.get("outputPath");
+ final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz");
+
+ Path hdfsreadpath = new Path(hdfsServerUri.concat(crossrefFileNameTarGz));
+ Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath));
+ conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
+ conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
+ FileSystem fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf);
+ FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath);
+ try (TarArchiveInputStream tais = new TarArchiveInputStream(
+ new GzipCompressorInputStream(crossrefFileStream))) {
+ TarArchiveEntry entry = null;
+ while ((entry = tais.getNextTarEntry()) != null) {
+ if (!entry.isDirectory()) {
+ try (
+ FSDataOutputStream out = fs
+ .create(new Path(outputPath.concat(entry.getName()).concat(".gz")));
+ GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
+
+ IOUtils.copy(tais, gzipOs);
+
+ }
+
+ }
+ }
+ }
+ Log.info("Crossref dump reading completed");
+
+ }
+}
diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala
new file mode 100644
index 0000000000..e48f68a7f7
--- /dev/null
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala
@@ -0,0 +1,71 @@
+package eu.dnetlib.doiboost.crossref
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
+import org.json4s
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST.JArray
+import org.json4s.jackson.JsonMethods.{compact, parse, render}
+import org.slf4j.{Logger, LoggerFactory}
+
+import scala.io.Source
+
+object GenerateCrossrefDataset {
+
+ val log: Logger = LoggerFactory.getLogger(GenerateCrossrefDataset.getClass)
+
+ implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT]
+
+ def extractDump(input:String):List[String] = {
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json: json4s.JValue = parse(input)
+
+ val a = (json \ "items").extract[JArray]
+ a.arr.map(s => compact(render(s)))
+ }
+
+
+ def crossrefElement(meta: String): CrossrefDT = {
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json: json4s.JValue = parse(meta)
+ val doi:String = (json \ "DOI").extract[String]
+ val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
+ CrossrefDT(doi, meta, timestamp)
+
+ }
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf
+ val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString)
+ parser.parseArgument(args)
+ val master = parser.get("master")
+ val sourcePath = parser.get("sourcePath")
+ val targetPath = parser.get("targetPath")
+
+ val spark: SparkSession = SparkSession.builder().config(conf)
+ .appName(GenerateCrossrefDataset.getClass.getSimpleName)
+ .master(master)
+ .getOrCreate()
+ val sc: SparkContext = spark.sparkContext
+
+ import spark.implicits._
+
+
+ def extractDump(input:String):List[String] = {
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json: json4s.JValue = parse(input)
+
+ val a = (json \ "items").extract[JArray]
+ a.arr.map(s => compact(render(s)))
+ }
+
+
+ sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2))
+ .map(meta => crossrefElement(meta))
+ .toDS()//.as[CrossrefDT]
+ .write.mode(SaveMode.Overwrite).save(targetPath)
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json
new file mode 100644
index 0000000000..b0222d422f
--- /dev/null
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json
@@ -0,0 +1,8 @@
+[
+ {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
+ {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true},
+ {"paramName":"f", "paramLongName":"crossrefFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true},
+ {"paramName":"issm", "paramLongName":"isSparkSessionManaged", "paramDescription": "the name of the activities orcid file", "paramRequired": false},
+ {"paramName":"o", "paramLongName":"outputPath", "paramDescription": "the name of the activities orcid file", "paramRequired": true}
+
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json
new file mode 100644
index 0000000000..63e0803372
--- /dev/null
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json
@@ -0,0 +1,21 @@
+[
+ {
+ "paramName": "s",
+ "paramLongName": "sourcePath",
+ "paramDescription": "the source mdstore path",
+ "paramRequired": true
+ },
+
+ {
+ "paramName": "t",
+ "paramLongName": "targetPath",
+ "paramDescription": "the target mdstore path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "m",
+ "paramLongName": "master",
+ "paramDescription": "the master name",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml
new file mode 100644
index 0000000000..508202e301
--- /dev/null
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml
@@ -0,0 +1,42 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ oozie.action.sharelib.for.spark
+ spark2
+
+
+ oozie.launcher.mapreduce.user.classpath.first
+ true
+
+
+ hive_metastore_uris
+ thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+
+ spark2YarnHistoryServerAddress
+ http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
+
+
+ spark2EventLogDir
+ /user/spark/spark2ApplicationHistory
+
+
+ spark2ExtraListeners
+ "com.cloudera.spark.lineage.NavigatorAppListener"
+
+
+ spark2SqlQueryExecutionListeners
+ "com.cloudera.spark.lineage.NavigatorQueryListener"
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml
new file mode 100644
index 0000000000..c7dc8bed45
--- /dev/null
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml
@@ -0,0 +1,92 @@
+
+
+
+ crossrefDumpPath
+ the working dir base path
+
+
+ inputPathCrossref
+ the working dir base path
+
+
+ sparkDriverMemory
+ memory for driver process
+
+
+ sparkExecutorMemory
+ memory for individual executor
+
+
+ sparkExecutorCores
+ 2
+ number of cores used by single executor
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
+ --hdfsServerUri${nameNode}
+ --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz
+ --workingPath${crossrefDumpPath}
+ --outputPath${workingDir}/files/
+
+
+
+
+
+
+
+ yarn-cluster
+ cluster
+ SparkGenerateCrossrefDataset
+ eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset
+ dhp-doiboost-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.sql.shuffle.partitions=3840
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+
+ --masteryarn-cluster
+ --sourcePath${workingDir}/files
+ --targetPath${inputPathCrossref}/crossref_ds_updated
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml
index 6cb8a577a7..7bd7d107f5 100644
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml
@@ -41,17 +41,21 @@
the Crossref input path
- crossrefTimestamp
- Timestamp for the Crossref incremental Harvesting
-
-
- esServer
- elasticsearch server url for the Crossref Harvesting
-
-
- esIndex
- elasticsearch index name for the Crossref Harvesting
+ crossrefDumpPath
+ the Crossref dump path
+
+
+
+
+
+
+
+
+
+
+
+
@@ -106,6 +110,7 @@
${wf:conf('resumeFrom') eq 'PreprocessORCID'}
${wf:conf('resumeFrom') eq 'CreateDOIBoost'}
${wf:conf('resumeFrom') eq 'GenerateActionSet'}
+ ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'}
@@ -114,55 +119,104 @@
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+
+
+
+
- eu.dnetlib.doiboost.crossref.CrossrefImporter
- --targetPath${inputPathCrossref}/index_update
- --namenode${nameNode}
- --esServer${esServer}
- --esIndex${esIndex}
- --timestamp${crossrefTimestamp}
+ ${jobTracker}
+ ${nameNode}
+ eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
+ --hdfsServerUri${nameNode}
+ --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz
+ --workingPath${crossrefDumpPath}
+ --outputPath${crossrefDumpPath}/files/
+
+
+ yarn-cluster
+ cluster
+ SparkGenerateCrossrefDataset
+ eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset
+ dhp-doiboost-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=2
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.sql.shuffle.partitions=3840
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+
+ --masteryarn-cluster
+ --sourcePath${crossrefDumpPath}/files/
+ --targetPath${inputPathCrossref}/crossref_ds
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
- yarn-cluster
- cluster
- GenerateCrossrefDataset
- eu.dnetlib.doiboost.crossref.CrossrefDataset
- dhp-doiboost-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --workingPath${inputPathCrossref}
- --masteryarn-cluster
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java
index f4e783edc6..1d4eca2c26 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java
@@ -94,14 +94,22 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
.filter(p -> HdfsSupport.exists(p, sc.hadoopConfiguration()))
.toArray(size -> new String[size]);
- spark
- .read()
- .parquet(validPaths)
- .map((MapFunction) r -> enrichRecord(r), Encoders.STRING())
- .toJavaRDD()
- .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml)))
- // .coalesce(1)
- .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
+ if (validPaths.length > 0) {
+ spark
+ .read()
+ .parquet(validPaths)
+ .map((MapFunction) r -> enrichRecord(r), Encoders.STRING())
+ .toJavaRDD()
+ .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml)))
+ // .coalesce(1)
+ .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
+ } else {
+ spark
+ .emptyDataFrame()
+ .toJavaRDD()
+ .mapToPair(xml -> new Tuple2<>(new Text(), new Text()))
+ .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
+ }
}
private static String enrichRecord(final Row r) {
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
index d26169fd6e..b977302df3 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
@@ -41,7 +41,8 @@ SELECT p.id,
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub,
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
p.callidentifier,
- p.code
+ p.code,
+ p.totalcost
FROM ${stats_db_name}.project_tmp p
LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
FROM ${stats_db_name}.project_results pr
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
index 528aaff528..3a7d9f4557 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
@@ -30,10 +30,21 @@ from rcount
group by rcount.pid;
create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
---
--- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS FOR COLUMNS;
--- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS FOR COLUMNS;
--- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
+
+create table ${stats_db_name}.result_instance stored as parquet as
+select distinct r.*
+from (
+ select substr(r.id, 4) as id, inst.accessright.classname as accessright, substr(inst.collectedfrom.key, 4) as collectedfrom,
+ substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid
+ from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
+join ${stats_db_name}.result res on res.id=r.id;
+
+create table ${stats_db_name}.result_apc as
+select r.id, r.amount, r.currency
+from (
+ select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
+ from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
+join ${stats_db_name}.result res on res.id=r.id
+where r.amount is not null;
+
+create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
index af5e2a6a4d..74aa8536ce 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@@ -16,7 +16,13 @@ create table TARGET.result as
select distinct * from (
select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
union all
- select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) ) foo;
+ select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
+ union all
+ select * from SOURCE.result r where exists (select 1 from SOURCE.result_project rp join SOURCE.project p on p.id=rp.project join SOURCE.project_organizations po on po.id=p.id join SOURCE.organization o on o.id=po.organization where ro.id=r.id and o.name in (
+ 'GEORG-AUGUST-UNIVERSITAT GOTTINGEN STIFTUNG OFFENTLICHEN RECHTS',
+ 'ATHINA-EREVNITIKO KENTRO KAINOTOMIAS STIS TECHNOLOGIES TIS PLIROFORIAS, TON EPIKOINONION KAI TIS GNOSIS',
+ 'Consiglio Nazionale delle Ricerche',
+ 'Universidade do Minho') )) foo;
compute stats TARGET.result;
create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
index 23ef03bc93..5d81e97bb9 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
@@ -39,7 +39,8 @@ CREATE TABLE ${stats_db_name}.project_tmp
daysforlastpub INT,
delayedpubs INT,
callidentifier STRING,
- code STRING
+ code STRING,
+ totalcost FLOAT
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
INSERT INTO ${stats_db_name}.project_tmp
@@ -62,7 +63,8 @@ SELECT substr(p.id, 4) AS id,
0 AS daysforlastpub,
0 AS delayedpubs,
p.callidentifier.value AS callidentifier,
- p.code.value AS code
+ p.code.value AS code,
+ p.totalcost AS totalcost
FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false;
@@ -70,15 +72,4 @@ create table ${stats_db_name}.funder as
select distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname
-from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
-
--- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS FOR COLUMNS;
--- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS FOR COLUMNS;
--- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS FOR COLUMNS;
--- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS FOR COLUMNS;
--- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS;
--- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
+from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
\ No newline at end of file