Compare commits

...

9 Commits

Author SHA1 Message Date
Lampros Smyrnaios ba533d9f34 Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions 2024-04-18 15:47:56 +03:00
Lampros Smyrnaios d46b78b659 dhp-stats-update:
- Set Steps 2-7 and 9 to limit the amount of files generated by Spark, from 8000, down to 100, to improve file-transfer and querying performance.
- Allow the workflow to run up to Step10. The Step11 seems to have some issues even when using hive-action.
2024-04-18 15:40:27 +03:00
Lampros Smyrnaios 6f2ebb2a52 Revert Step8 and Step11 to use Hive again, since their "UPDATE" statements are not supported by Spark. 2024-04-18 15:35:03 +03:00
Antonis Lempesis 0c71c58df6 fixed the definition of gold_oa 2024-04-18 12:01:27 +03:00
Antonis Lempesis 43d05dbebb fixed the definition of result_country 2024-04-18 11:53:50 +03:00
Antonis Lempesis e728a0897c fixed the definition of indi_pub_bronze_oa 2024-04-18 11:07:55 +03:00
Antonis Lempesis 308ae580a9 slight optimization in indi_pub_gold_oa definition 2024-04-18 10:57:52 +03:00
Antonis Lempesis 27d22bd8f9 slight optimization in indi_pub_gold_oa definition 2024-04-17 23:59:52 +03:00
Antonis Lempesis 1f5aba12fa slight optimization in indi_pub_gold_oa definition 2024-04-17 23:54:23 +03:00
11 changed files with 148 additions and 114 deletions

View File

@ -10,7 +10,7 @@ SET harvested='true'
WHERE datasource_tmp.id IN (SELECT DISTINCT d.id
FROM ${stats_db_name}.datasource_tmp d,
${stats_db_name}.result_datasources rd
WHERE d.id = rd.datasource); /*EOS*/
WHERE d.id = rd.datasource); -- /*EOS*/
-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables
UPDATE ${stats_db_name}.project_tmp
@ -19,9 +19,9 @@ WHERE project_tmp.id IN (SELECT pr.id
FROM ${stats_db_name}.project_results pr,
${stats_db_name}.result r
WHERE pr.result = r.id
AND r.type = 'publication'); /*EOS*/
AND r.type = 'publication'); -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.project stored as parquet as
SELECT p.id,
@ -64,7 +64,7 @@ FROM ${stats_db_name}.project_tmp p
AND r.type = 'publication'
AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
GROUP BY pp.id) AS prr2
ON prr2.id = p.id; /*EOS*/
ON prr2.id = p.id; -- /*EOS*/
UPDATE ${stats_db_name}.publication_tmp
SET delayed = 'yes'
@ -74,7 +74,7 @@ WHERE publication_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p
WHERE r.id = pr.result
AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
UPDATE ${stats_db_name}.dataset_tmp
SET delayed = 'yes'
@ -84,7 +84,7 @@ WHERE dataset_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p
WHERE r.id = pr.result
AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
UPDATE ${stats_db_name}.software_tmp
SET delayed = 'yes'
@ -94,7 +94,7 @@ WHERE software_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p
WHERE r.id = pr.result
AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
UPDATE ${stats_db_name}.otherresearchproduct_tmp
SET delayed = 'yes'
@ -104,7 +104,7 @@ WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p
WHERE r.id = pr.result
AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
SELECT result_projects.id AS result,
@ -117,4 +117,4 @@ FROM ${stats_db_name}.result_projects,
${stats_db_name}.project
WHERE result_projects.id = result.id
AND result.type = 'publication'
AND project.id = result_projects.project; /*EOS*/
AND project.id = result_projects.project; -- /*EOS*/

View File

@ -242,12 +242,14 @@ create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet a
select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo
)
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd
FROM ${stats_db_name}.publication pd
left outer join (
select pd.id, 1 as is_gold
FROM ${stats_db_name}.publication_datasources pd
join dd on dd.id=pd.datasource
left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/
left semi join dd on dd.id=pd.datasource
union all
select ra.id, 1 as is_gold
from ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as
@ -282,14 +284,17 @@ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as
drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd
select distinct p.id, coalesce(is_hybrid, 0) is_hybrid
from ${stats_db_name}.publication p
left outer join (
select pd.id, 1 as is_hybrid from ${stats_db_name}.publication pd
join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
select p.id, 1 as is_hybrid
from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id=p.id
join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.id=tmp.id; /*EOS*/
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=p.id
left outer join ${stats_db_name}.result_accessroute ra on ra.id=p.id
where indi_gold.is_gold=0 and
((d.type like '%Journal%' and ri.accessright not in ('Closed Access', 'Restricted', 'Not Available') and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.i=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as
@ -661,17 +666,18 @@ drop view pub_fos_totals; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; /*EOS*/
create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication pd
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication pd
join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.indi_pub_hybrid indi_hybrid on indi_hybrid.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0
and ((d.type like '%Journal%' and ri.accessright!='Closed Access'
and ri.accessright!='Restricted' and ri.license is null) or ra.accessroute='bronze')) tmp
on pd.id=tmp.id; /*EOS*/
select distinct p.id,coalesce(is_bronze_oa,0) is_bronze_oa
from ${stats_db_name}.publication p
left outer join (
select p.id, 1 as is_bronze_oa
from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id=p.id
join ${stats_db_name}.datasource d on d.id=ri.hostedby
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=p.id
join ${stats_db_name}.indi_pub_hybrid indi_hybrid on indi_hybrid.id=p.id
left outer join ${stats_db_name}.result_accessroute ra on ra.id=p.id
where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0
and ((d.type like '%Journal%' and ri.accessright not in ('Closed Access', 'Restricted', 'Not Available') and ri.license is null) or ra.accessroute='bronze')) tmp on p.id=tmp.id; /*EOS*/
CREATE TEMPORARY VIEW project_year_result_year as
select p.id project_id, acronym, r.id result_id, r.year, p.end_year
@ -1002,13 +1008,18 @@ left outer join (
drop table if exists ${stats_db_name}.result_country purge; /*EOS*/
create table ${stats_db_name}.result_country stored as parquet as
select distinct ro.id, coalesce(o.country, f.country)
from ${stats_db_name}.result_organization ro
left outer join ${stats_db_name}.organization o on o.id=ro.organization
left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id
left outer join ${stats_db_name}.project p on p.id=rp.project
left outer join ${stats_db_name}.funder f on f.name=p.funder
where coalesce(o.country, f.country) IS NOT NULL;
select distinct *
from (
select ro.id, o.country
from ${stats_db_name}.result_organization ro
left outer join ${stats_db_name}.organization o on o.id=ro.organization
union all
select rp.id, f.country
from ${stats_db_name}.result_projects
left outer join ${stats_db_name}.project p on p.id=rp.project
left outer join ${stats_db_name}.funder f on f.name=p.funder
) rc
where rc.country is not null; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/
create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as

View File

@ -44,7 +44,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, instancetype.classname as type
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, instancetype.classname as type
from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -52,7 +52,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -63,7 +63,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
@ -76,14 +76,14 @@ FROM (
DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
select substr(p.id, 4) as id, p.language.classname as language
select /*+ COALESCE(100) */ substr(p.id, 4) as id, p.language.classname as language
FROM ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -91,7 +91,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -99,7 +99,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
select /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -107,7 +107,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.publication p
lateral view explode(p.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""

View File

@ -45,7 +45,7 @@ WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.dataset d
LATERAL VIEW explode(d.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
@ -54,7 +54,7 @@ WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -62,7 +62,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -73,7 +73,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
SELECT /*+ COALESCE(100) */ p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
FROM ${openaire_db_name}.dataset p
@ -87,14 +87,14 @@ FROM (
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.dataset p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -102,7 +102,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -110,7 +110,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

View File

@ -44,7 +44,7 @@ where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
SELECT /*+ COALESCE(100) */ substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.software s
LATERAL VIEW explode(s.extrainfo) citations as citation
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
@ -53,7 +53,7 @@ where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -61,7 +61,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -72,7 +72,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
SELECT /*+ COALESCE(100) */ p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
FROM (
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
FROM ${openaire_db_name}.software p
@ -86,14 +86,14 @@ FROM (
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
select substr(p.id, 4) AS id, p.language.classname AS language
select /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.software p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -101,7 +101,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -109,7 +109,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

View File

@ -45,7 +45,7 @@ WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
SELECT /*+ COALESCE(100) */ substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
@ -53,14 +53,14 @@ WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -70,7 +70,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
SELECT /*+ COALESCE(100) */ p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
@ -81,27 +81,27 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.otherresearchproduct p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

View File

@ -6,14 +6,14 @@
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
SELECT /*+ COALESCE(100) */ substr(r.source, 4) AS id, substr(r.target, 4) AS organization
from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
@ -21,7 +21,7 @@ WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' and r.target like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
@ -29,7 +29,7 @@ WHERE r.reltype = 'resultProject' and r.target like '40|%'
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/
create table ${stats_db_name}.project_classification STORED AS PARQUET as
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
select /*+ COALESCE(100) */ substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
from ${openaire_db_name}.project p
lateral view explode(p.h2020classification) classifs as class
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/
@ -93,7 +93,7 @@ WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/
create table ${stats_db_name}.funder STORED AS PARQUET as
select distinct xpath_string(fund, '//funder/id') as id,
select /*+ COALESCE(100) */ distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname,
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
@ -102,7 +102,7 @@ from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fun
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
SELECT /*+ COALESCE(100) */ distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
properties[0].value contribution, properties[1].value currency
from ${openaire_db_name}.relation r
LATERAL VIEW explode (r.properties) properties

View File

@ -130,7 +130,7 @@ with
lvl1 as (select id, topic from ${stats_db_name}.result_topics where topic like '__ %' and type='Fields of Science and Technology classification'),
lvl2 as (select id, topic from ${stats_db_name}.result_topics where topic like '____ %' and type='Fields of Science and Technology classification'),
lvl3 as (select id, topic from ${stats_db_name}.result_topics where topic like '______ %' and type='Fields of Science and Technology classification')
select lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
select /*+ COALESCE(100) */ lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
from lvl1
join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4); /*EOS*/
@ -138,7 +138,7 @@ from lvl1
DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultOrganization'
and r.target like '50|%'
@ -147,8 +147,7 @@ WHERE r.reltype = 'resultOrganization'
DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
FROM ${stats_db_name}.result r
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; /*EOS*/
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; /*EOS*/

View File

@ -5,7 +5,7 @@
-- Datasource table/view and Datasource related tables/views
------------------------------------------------------------
------------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_tmp
(
@ -23,6 +23,7 @@ CREATE TABLE ${stats_db_name}.datasource_tmp
issn_printed STRING,
issn_online STRING
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); -- /*EOS*/
-- Insert statement that takes into account the piwik_id of the openAIRE graph
INSERT INTO ${stats_db_name}.datasource_tmp
@ -46,16 +47,16 @@ FROM ${openaire_db_name}.datasource d1
LATERAL VIEW EXPLODE(originalid) temp AS originalidd
WHERE originalidd like "piwik:%") AS d2
ON d1.id = d2.id
WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; /*EOS*/
WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; -- /*EOS*/
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
-- Creating a temporary dual table that will be removed after the following insert
DROP TABLE IF EXISTS ${stats_db_name}.dual purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dual purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); /*EOS*/
CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); -- /*EOS*/
INSERT INTO ${stats_db_name}.dual VALUES ('X'); /*EOS*/
INSERT INTO ${stats_db_name}.dual VALUES ('X'); -- /*EOS*/
INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`,
`piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`)
@ -73,42 +74,42 @@ SELECT 'other',
null,
null
FROM ${stats_db_name}.dual
WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository'); /*EOS*/
DROP TABLE ${stats_db_name}.dual; /*EOS*/
WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository'); -- /*EOS*/
DROP TABLE ${stats_db_name}.dual; -- /*EOS*/
UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name = 'Unknown Repository'; /*EOS*/
UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1'; /*EOS*/
UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name = 'Unknown Repository'; -- /*EOS*/
UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1'; -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, langs.languages AS language
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; /*EOS*/
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; -- /*EOS*/
-- datasource sources:
-- where the datasource info have been collected from.
DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; -- /*EOS*/
create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; -- /*EOS*/
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
SELECT datasource AS id, id AS result
FROM ${stats_db_name}.result_datasources; /*EOS*/
FROM ${stats_db_name}.result_datasources; -- /*EOS*/

View File

@ -6,7 +6,7 @@
DROP TABLE IF EXISTS ${stats_db_name}.organization purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS
SELECT substr(o.id, 4) as id,
SELECT /*+ COALESCE(100) */ substr(o.id, 4) as id,
o.legalname.value as name,
o.legalshortname.value as legalshortname,
o.country.classid as country

View File

@ -303,12 +303,11 @@
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
</spark>
<!-- <ok to="Step8"/>-->
<ok to="End"/>
<ok to="Step8"/>
<error to="Kill"/>
</action>
<action name="Step8">
<!-- <action name="Step8">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
@ -316,18 +315,29 @@
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
<jar>dhp-stats-update-${projectVersion}.jar</jar>
<spark-opts>
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
&#45;&#45;conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
${sparkClusterOpts}
${sparkResourceOpts}
${sparkApplicationOpts}
</spark-opts>
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql</arg>
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
<arg>&#45;&#45;hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>&#45;&#45;sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql</arg>
<arg>&#45;&#45;stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>&#45;&#45;openaire_db_name</arg><arg>${openaire_db_name}</arg>
</spark>
<ok to="Step9"/>
<error to="Kill"/>
</action>-->
<action name="Step8">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/step8.sql</script>
<param>stats_db_name=${stats_db_name}</param>
<param>openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to="Step9"/>
<error to="Kill"/>
</action>
<action name="Step9">
@ -371,11 +381,12 @@
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
<arg>--external_stats_db_name</arg><arg>${external_stats_db_name}</arg>
</spark>
<ok to="Step11"/>
<!-- <ok to="Step11"/>-->
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="Step11">
<!-- <action name="Step11">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
@ -383,19 +394,31 @@
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
<jar>dhp-stats-update-${projectVersion}.jar</jar>
<spark-opts>
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
&#45;&#45;conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
${sparkClusterOpts}
${sparkResourceOpts}
${sparkApplicationOpts}
</spark-opts>
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql</arg>
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
<arg>--external_stats_db_name</arg><arg>${external_stats_db_name}</arg>
<arg>&#45;&#45;hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>&#45;&#45;sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql</arg>
<arg>&#45;&#45;stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>&#45;&#45;openaire_db_name</arg><arg>${openaire_db_name}</arg>
<arg>&#45;&#45;external_stats_db_name</arg><arg>${external_stats_db_name}</arg>
</spark>
<ok to="Step12"/>
<error to="Kill"/>
</action>-->
<action name="Step11">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/step11.sql</script>
<param>stats_db_name=${stats_db_name}</param>
<param>openaire_db_name=${openaire_db_name}</param>
<param>external_stats_db_name=${external_stats_db_name}</param>
</hive2>
<ok to="Step12"/>
<error to="Kill"/>
</action>
<action name="Step12">