Compare commits

...

9 Commits

Author SHA1 Message Date
Lampros Smyrnaios ba533d9f34 Merge branch 'beta' of https://code-repo.d4science.org/antonis.lempesis/dnet-hadoop into convert_hive_to_spark_actions 2024-04-18 15:47:56 +03:00
Lampros Smyrnaios d46b78b659 dhp-stats-update:
- Set Steps 2-7 and 9 to limit the amount of files generated by Spark, from 8000, down to 100, to improve file-transfer and querying performance.
- Allow the workflow to run up to Step10. The Step11 seems to have some issues even when using hive-action.
2024-04-18 15:40:27 +03:00
Lampros Smyrnaios 6f2ebb2a52 Revert Step8 and Step11 to use Hive again, since their "UPDATE" statements are not supported by Spark. 2024-04-18 15:35:03 +03:00
Antonis Lempesis 0c71c58df6 fixed the definition of gold_oa 2024-04-18 12:01:27 +03:00
Antonis Lempesis 43d05dbebb fixed the definition of result_country 2024-04-18 11:53:50 +03:00
Antonis Lempesis e728a0897c fixed the definition of indi_pub_bronze_oa 2024-04-18 11:07:55 +03:00
Antonis Lempesis 308ae580a9 slight optimization in indi_pub_gold_oa definition 2024-04-18 10:57:52 +03:00
Antonis Lempesis 27d22bd8f9 slight optimization in indi_pub_gold_oa definition 2024-04-17 23:59:52 +03:00
Antonis Lempesis 1f5aba12fa slight optimization in indi_pub_gold_oa definition 2024-04-17 23:54:23 +03:00
11 changed files with 148 additions and 114 deletions

View File

@ -10,7 +10,7 @@ SET harvested='true'
WHERE datasource_tmp.id IN (SELECT DISTINCT d.id WHERE datasource_tmp.id IN (SELECT DISTINCT d.id
FROM ${stats_db_name}.datasource_tmp d, FROM ${stats_db_name}.datasource_tmp d,
${stats_db_name}.result_datasources rd ${stats_db_name}.result_datasources rd
WHERE d.id = rd.datasource); /*EOS*/ WHERE d.id = rd.datasource); -- /*EOS*/
-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables -- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables
UPDATE ${stats_db_name}.project_tmp UPDATE ${stats_db_name}.project_tmp
@ -19,9 +19,9 @@ WHERE project_tmp.id IN (SELECT pr.id
FROM ${stats_db_name}.project_results pr, FROM ${stats_db_name}.project_results pr,
${stats_db_name}.result r ${stats_db_name}.result r
WHERE pr.result = r.id WHERE pr.result = r.id
AND r.type = 'publication'); /*EOS*/ AND r.type = 'publication'); -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.project purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.project stored as parquet as CREATE TABLE ${stats_db_name}.project stored as parquet as
SELECT p.id, SELECT p.id,
@ -64,7 +64,7 @@ FROM ${stats_db_name}.project_tmp p
AND r.type = 'publication' AND r.type = 'publication'
AND datediff(to_date(r.date), to_date(pp.enddate)) > 0 AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
GROUP BY pp.id) AS prr2 GROUP BY pp.id) AS prr2
ON prr2.id = p.id; /*EOS*/ ON prr2.id = p.id; -- /*EOS*/
UPDATE ${stats_db_name}.publication_tmp UPDATE ${stats_db_name}.publication_tmp
SET delayed = 'yes' SET delayed = 'yes'
@ -74,7 +74,7 @@ WHERE publication_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p ${stats_db_name}.project_tmp p
WHERE r.id = pr.result WHERE r.id = pr.result
AND pr.id = p.id AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/ AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
UPDATE ${stats_db_name}.dataset_tmp UPDATE ${stats_db_name}.dataset_tmp
SET delayed = 'yes' SET delayed = 'yes'
@ -84,7 +84,7 @@ WHERE dataset_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p ${stats_db_name}.project_tmp p
WHERE r.id = pr.result WHERE r.id = pr.result
AND pr.id = p.id AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/ AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
UPDATE ${stats_db_name}.software_tmp UPDATE ${stats_db_name}.software_tmp
SET delayed = 'yes' SET delayed = 'yes'
@ -94,7 +94,7 @@ WHERE software_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p ${stats_db_name}.project_tmp p
WHERE r.id = pr.result WHERE r.id = pr.result
AND pr.id = p.id AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/ AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
UPDATE ${stats_db_name}.otherresearchproduct_tmp UPDATE ${stats_db_name}.otherresearchproduct_tmp
SET delayed = 'yes' SET delayed = 'yes'
@ -104,7 +104,7 @@ WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
${stats_db_name}.project_tmp p ${stats_db_name}.project_tmp p
WHERE r.id = pr.result WHERE r.id = pr.result
AND pr.id = p.id AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/ AND to_date(r.date) - to_date(p.enddate) > 0); -- /*EOS*/
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
SELECT result_projects.id AS result, SELECT result_projects.id AS result,
@ -117,4 +117,4 @@ FROM ${stats_db_name}.result_projects,
${stats_db_name}.project ${stats_db_name}.project
WHERE result_projects.id = result.id WHERE result_projects.id = result.id
AND result.type = 'publication' AND result.type = 'publication'
AND project.id = result_projects.project; /*EOS*/ AND project.id = result_projects.project; -- /*EOS*/

View File

@ -242,12 +242,14 @@ create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet a
select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo
) )
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd FROM ${stats_db_name}.publication pd
left outer join ( left outer join (
select pd.id, 1 as is_gold select pd.id, 1 as is_gold
FROM ${stats_db_name}.publication_datasources pd FROM ${stats_db_name}.publication_datasources pd
join dd on dd.id=pd.datasource left semi join dd on dd.id=pd.datasource
left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/ union all
select ra.id, 1 as is_gold
from ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as
@ -282,14 +284,17 @@ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as
drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd select distinct p.id, coalesce(is_hybrid, 0) is_hybrid
from ${stats_db_name}.publication p
left outer join ( left outer join (
select pd.id, 1 as is_hybrid from ${stats_db_name}.publication pd select p.id, 1 as is_hybrid
join ${stats_db_name}.result_instance ri on ri.id=pd.id from ${stats_db_name}.publication p
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id join ${stats_db_name}.result_instance ri on ri.id=p.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
join ${stats_db_name}.datasource d on d.id=ri.hostedby join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.id=tmp.id; /*EOS*/ join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=p.id
left outer join ${stats_db_name}.result_accessroute ra on ra.id=p.id
where indi_gold.is_gold=0 and
((d.type like '%Journal%' and ri.accessright not in ('Closed Access', 'Restricted', 'Not Available') and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.i=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as
@ -661,17 +666,18 @@ drop view pub_fos_totals; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; /*EOS*/
create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication pd select distinct p.id,coalesce(is_bronze_oa,0) is_bronze_oa
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication pd from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id=pd.id left outer join (
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id select p.id, 1 as is_bronze_oa
join ${stats_db_name}.indi_pub_hybrid indi_hybrid on indi_hybrid.id=pd.id from ${stats_db_name}.publication p
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id join ${stats_db_name}.result_instance ri on ri.id=p.id
join ${stats_db_name}.datasource d on d.id=ri.hostedby join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0 join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=p.id
and ((d.type like '%Journal%' and ri.accessright!='Closed Access' join ${stats_db_name}.indi_pub_hybrid indi_hybrid on indi_hybrid.id=p.id
and ri.accessright!='Restricted' and ri.license is null) or ra.accessroute='bronze')) tmp left outer join ${stats_db_name}.result_accessroute ra on ra.id=p.id
on pd.id=tmp.id; /*EOS*/ where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0
and ((d.type like '%Journal%' and ri.accessright not in ('Closed Access', 'Restricted', 'Not Available') and ri.license is null) or ra.accessroute='bronze')) tmp on p.id=tmp.id; /*EOS*/
CREATE TEMPORARY VIEW project_year_result_year as CREATE TEMPORARY VIEW project_year_result_year as
select p.id project_id, acronym, r.id result_id, r.year, p.end_year select p.id project_id, acronym, r.id result_id, r.year, p.end_year
@ -1002,13 +1008,18 @@ left outer join (
drop table if exists ${stats_db_name}.result_country purge; /*EOS*/ drop table if exists ${stats_db_name}.result_country purge; /*EOS*/
create table ${stats_db_name}.result_country stored as parquet as create table ${stats_db_name}.result_country stored as parquet as
select distinct ro.id, coalesce(o.country, f.country) select distinct *
from ${stats_db_name}.result_organization ro from (
left outer join ${stats_db_name}.organization o on o.id=ro.organization select ro.id, o.country
left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id from ${stats_db_name}.result_organization ro
left outer join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.organization o on o.id=ro.organization
left outer join ${stats_db_name}.funder f on f.name=p.funder union all
where coalesce(o.country, f.country) IS NOT NULL; select rp.id, f.country
from ${stats_db_name}.result_projects
left outer join ${stats_db_name}.project p on p.id=rp.project
left outer join ${stats_db_name}.funder f on f.name=p.funder
) rc
where rc.country is not null; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/
create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as

View File

@ -44,7 +44,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, instancetype.classname as type SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, instancetype.classname as type
from ${openaire_db_name}.publication p from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -52,7 +52,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -63,7 +63,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
@ -76,14 +76,14 @@ FROM (
DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
select substr(p.id, 4) as id, p.language.classname as language select /*+ COALESCE(100) */ substr(p.id, 4) as id, p.language.classname as language
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -91,7 +91,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -99,7 +99,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic select /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -107,7 +107,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
lateral view explode(p.extrainfo) citations AS citation lateral view explode(p.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""

View File

@ -45,7 +45,7 @@ WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.dataset d FROM ${openaire_db_name}.dataset d
LATERAL VIEW explode(d.extrainfo) citations AS citation LATERAL VIEW explode(d.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
@ -54,7 +54,7 @@ WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -62,7 +62,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -73,7 +73,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT /*+ COALESCE(100) */ p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
@ -87,14 +87,14 @@ FROM (
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -102,7 +102,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -110,7 +110,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

View File

@ -44,7 +44,7 @@ where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT /*+ COALESCE(100) */ substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.software s FROM ${openaire_db_name}.software s
LATERAL VIEW explode(s.extrainfo) citations as citation LATERAL VIEW explode(s.extrainfo) citations as citation
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
@ -53,7 +53,7 @@ where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -61,7 +61,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -72,7 +72,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource SELECT /*+ COALESCE(100) */ p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
@ -86,14 +86,14 @@ FROM (
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
select substr(p.id, 4) AS id, p.language.classname AS language select /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -101,7 +101,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
@ -109,7 +109,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

View File

@ -45,7 +45,7 @@ WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT /*+ COALESCE(100) */ substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/ and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
@ -53,14 +53,14 @@ WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
@ -70,7 +70,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT /*+ COALESCE(100) */ p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
@ -81,27 +81,27 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.otherresearchproduct p FROM ${openaire_db_name}.otherresearchproduct p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/

View File

@ -6,14 +6,14 @@
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization SELECT /*+ COALESCE(100) */ substr(r.source, 4) AS id, substr(r.target, 4) AS organization
from ${openaire_db_name}.relation r from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' and r.source like '40|%' WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/ and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
@ -21,7 +21,7 @@ WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' and r.target like '40|%' WHERE r.reltype = 'resultProject' and r.target like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/ and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
@ -29,7 +29,7 @@ WHERE r.reltype = 'resultProject' and r.target like '40|%'
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/
create table ${stats_db_name}.project_classification STORED AS PARQUET as create table ${stats_db_name}.project_classification STORED AS PARQUET as
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 select /*+ COALESCE(100) */ substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
from ${openaire_db_name}.project p from ${openaire_db_name}.project p
lateral view explode(p.h2020classification) classifs as class lateral view explode(p.h2020classification) classifs as class
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/ where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/
@ -93,7 +93,7 @@ WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/
create table ${stats_db_name}.funder STORED AS PARQUET as create table ${stats_db_name}.funder STORED AS PARQUET as
select distinct xpath_string(fund, '//funder/id') as id, select /*+ COALESCE(100) */ distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname, xpath_string(fund, '//funder/shortname') as shortname,
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
@ -102,7 +102,7 @@ from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fun
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization, SELECT /*+ COALESCE(100) */ distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
properties[0].value contribution, properties[1].value currency properties[0].value contribution, properties[1].value currency
from ${openaire_db_name}.relation r from ${openaire_db_name}.relation r
LATERAL VIEW explode (r.properties) properties LATERAL VIEW explode (r.properties) properties

View File

@ -130,7 +130,7 @@ with
lvl1 as (select id, topic from ${stats_db_name}.result_topics where topic like '__ %' and type='Fields of Science and Technology classification'), lvl1 as (select id, topic from ${stats_db_name}.result_topics where topic like '__ %' and type='Fields of Science and Technology classification'),
lvl2 as (select id, topic from ${stats_db_name}.result_topics where topic like '____ %' and type='Fields of Science and Technology classification'), lvl2 as (select id, topic from ${stats_db_name}.result_topics where topic like '____ %' and type='Fields of Science and Technology classification'),
lvl3 as (select id, topic from ${stats_db_name}.result_topics where topic like '______ %' and type='Fields of Science and Technology classification') lvl3 as (select id, topic from ${stats_db_name}.result_topics where topic like '______ %' and type='Fields of Science and Technology classification')
select lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3 select /*+ COALESCE(100) */ lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
from lvl1 from lvl1
join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2) join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4); /*EOS*/ join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4); /*EOS*/
@ -138,7 +138,7 @@ from lvl1
DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultOrganization' WHERE r.reltype = 'resultOrganization'
and r.target like '50|%' and r.target like '50|%'
@ -147,8 +147,7 @@ WHERE r.reltype = 'resultOrganization'
DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
FROM ${stats_db_name}.result r FROM ${stats_db_name}.result r
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; /*EOS*/ JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; /*EOS*/

View File

@ -5,7 +5,7 @@
-- Datasource table/view and Datasource related tables/views -- Datasource table/view and Datasource related tables/views
------------------------------------------------------------ ------------------------------------------------------------
------------------------------------------------------------ ------------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_tmp CREATE TABLE ${stats_db_name}.datasource_tmp
( (
@ -23,6 +23,7 @@ CREATE TABLE ${stats_db_name}.datasource_tmp
issn_printed STRING, issn_printed STRING,
issn_online STRING issn_online STRING
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/ ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); -- /*EOS*/
-- Insert statement that takes into account the piwik_id of the openAIRE graph -- Insert statement that takes into account the piwik_id of the openAIRE graph
INSERT INTO ${stats_db_name}.datasource_tmp INSERT INTO ${stats_db_name}.datasource_tmp
@ -46,16 +47,16 @@ FROM ${openaire_db_name}.datasource d1
LATERAL VIEW EXPLODE(originalid) temp AS originalidd LATERAL VIEW EXPLODE(originalid) temp AS originalidd
WHERE originalidd like "piwik:%") AS d2 WHERE originalidd like "piwik:%") AS d2
ON d1.id = d2.id ON d1.id = d2.id
WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; /*EOS*/ WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; -- /*EOS*/
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. -- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
-- Creating a temporary dual table that will be removed after the following insert -- Creating a temporary dual table that will be removed after the following insert
DROP TABLE IF EXISTS ${stats_db_name}.dual purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dual purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); /*EOS*/ CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); -- /*EOS*/
INSERT INTO ${stats_db_name}.dual VALUES ('X'); /*EOS*/ INSERT INTO ${stats_db_name}.dual VALUES ('X'); -- /*EOS*/
INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`, INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`,
`piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`) `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`)
@ -73,42 +74,42 @@ SELECT 'other',
null, null,
null null
FROM ${stats_db_name}.dual FROM ${stats_db_name}.dual
WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository'); /*EOS*/ WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository'); -- /*EOS*/
DROP TABLE ${stats_db_name}.dual; /*EOS*/ DROP TABLE ${stats_db_name}.dual; -- /*EOS*/
UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name = 'Unknown Repository'; /*EOS*/ UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name = 'Unknown Repository'; -- /*EOS*/
UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1'; /*EOS*/ UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1'; -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, langs.languages AS language SELECT substr(d.id, 4) AS id, langs.languages AS language
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, oids.ids AS oid SELECT substr(d.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; -- /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; -- /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; /*EOS*/ WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; -- /*EOS*/
-- datasource sources: -- datasource sources:
-- where the datasource info have been collected from. -- where the datasource info have been collected from.
DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; -- /*EOS*/
create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/ where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; -- /*EOS*/
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
SELECT datasource AS id, id AS result SELECT datasource AS id, id AS result
FROM ${stats_db_name}.result_datasources; /*EOS*/ FROM ${stats_db_name}.result_datasources; -- /*EOS*/

View File

@ -6,7 +6,7 @@
DROP TABLE IF EXISTS ${stats_db_name}.organization purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.organization purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS
SELECT substr(o.id, 4) as id, SELECT /*+ COALESCE(100) */ substr(o.id, 4) as id,
o.legalname.value as name, o.legalname.value as name,
o.legalshortname.value as legalshortname, o.legalshortname.value as legalshortname,
o.country.classid as country o.country.classid as country

View File

@ -303,12 +303,11 @@
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg> <arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg> <arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
</spark> </spark>
<!-- <ok to="Step8"/>--> <ok to="Step8"/>
<ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="Step8"> <!-- <action name="Step8">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
@ -316,18 +315,29 @@
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class> <class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
<jar>dhp-stats-update-${projectVersion}.jar</jar> <jar>dhp-stats-update-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} &#45;&#45;conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
${sparkClusterOpts} ${sparkClusterOpts}
${sparkResourceOpts} ${sparkResourceOpts}
${sparkApplicationOpts} ${sparkApplicationOpts}
</spark-opts> </spark-opts>
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg> <arg>&#45;&#45;hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql</arg> <arg>&#45;&#45;sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql</arg>
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg> <arg>&#45;&#45;stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg> <arg>&#45;&#45;openaire_db_name</arg><arg>${openaire_db_name}</arg>
</spark> </spark>
<ok to="Step9"/> <ok to="Step9"/>
<error to="Kill"/> <error to="Kill"/>
</action>-->
<action name="Step8">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/step8.sql</script>
<param>stats_db_name=${stats_db_name}</param>
<param>openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to="Step9"/>
<error to="Kill"/>
</action> </action>
<action name="Step9"> <action name="Step9">
@ -371,11 +381,12 @@
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg> <arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
<arg>--external_stats_db_name</arg><arg>${external_stats_db_name}</arg> <arg>--external_stats_db_name</arg><arg>${external_stats_db_name}</arg>
</spark> </spark>
<ok to="Step11"/> <!-- <ok to="Step11"/>-->
<ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="Step11"> <!-- <action name="Step11">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
@ -383,19 +394,31 @@
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class> <class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
<jar>dhp-stats-update-${projectVersion}.jar</jar> <jar>dhp-stats-update-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} &#45;&#45;conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
${sparkClusterOpts} ${sparkClusterOpts}
${sparkResourceOpts} ${sparkResourceOpts}
${sparkApplicationOpts} ${sparkApplicationOpts}
</spark-opts> </spark-opts>
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg> <arg>&#45;&#45;hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql</arg> <arg>&#45;&#45;sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql</arg>
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg> <arg>&#45;&#45;stats_db_name</arg><arg>${stats_db_name}</arg>
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg> <arg>&#45;&#45;openaire_db_name</arg><arg>${openaire_db_name}</arg>
<arg>--external_stats_db_name</arg><arg>${external_stats_db_name}</arg> <arg>&#45;&#45;external_stats_db_name</arg><arg>${external_stats_db_name}</arg>
</spark> </spark>
<ok to="Step12"/> <ok to="Step12"/>
<error to="Kill"/> <error to="Kill"/>
</action>-->
<action name="Step11">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/step11.sql</script>
<param>stats_db_name=${stats_db_name}</param>
<param>openaire_db_name=${openaire_db_name}</param>
<param>external_stats_db_name=${external_stats_db_name}</param>
</hive2>
<ok to="Step12"/>
<error to="Kill"/>
</action> </action>
<action name="Step12"> <action name="Step12">