dhp-stats-update:
- Set Steps 2-7 and 9 to limit the amount of files generated by Spark, from 8000, down to 100, to improve file-transfer and querying performance. - Allow the workflow to run up to Step10. The Step11 seems to have some issues even when using hive-action.
This commit is contained in:
parent
6f2ebb2a52
commit
d46b78b659
|
@ -44,7 +44,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, instancetype.classname as type
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, instancetype.classname as type
|
||||
from ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -52,7 +52,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
|
@ -63,7 +63,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
SELECT /*+ COALESCE(100) */ p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
|
||||
from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
|
||||
|
@ -76,14 +76,14 @@ FROM (
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
|
||||
select substr(p.id, 4) as id, p.language.classname as language
|
||||
select /*+ COALESCE(100) */ substr(p.id, 4) as id, p.language.classname as language
|
||||
FROM ${openaire_db_name}.publication p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -91,7 +91,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -99,7 +99,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
|
||||
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
||||
select /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -107,7 +107,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.publication p
|
||||
lateral view explode(p.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
|
|
|
@ -45,7 +45,7 @@ WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
|
||||
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.dataset d
|
||||
LATERAL VIEW explode(d.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
|
@ -54,7 +54,7 @@ WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -62,7 +62,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
|
@ -73,7 +73,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
SELECT /*+ COALESCE(100) */ p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
|
@ -87,14 +87,14 @@ FROM (
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -102,7 +102,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -110,7 +110,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
|
@ -44,7 +44,7 @@ where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
|
||||
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
SELECT /*+ COALESCE(100) */ substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.software s
|
||||
LATERAL VIEW explode(s.extrainfo) citations as citation
|
||||
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
|
@ -53,7 +53,7 @@ where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -61,7 +61,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
|
@ -72,7 +72,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
||||
SELECT /*+ COALESCE(100) */ p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
FROM ${openaire_db_name}.software p
|
||||
|
@ -86,14 +86,14 @@ FROM (
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
|
||||
select substr(p.id, 4) AS id, p.language.classname AS language
|
||||
select /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.software p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -101,7 +101,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -109,7 +109,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
|
@ -45,7 +45,7 @@ WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
|
||||
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
SELECT /*+ COALESCE(100) */ substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -53,14 +53,14 @@ WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !=
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||
|
@ -70,7 +70,7 @@ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
SELECT /*+ COALESCE(100) */ p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
|
||||
|
@ -81,27 +81,27 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.otherresearchproduct p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
|
@ -6,14 +6,14 @@
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||
SELECT /*+ COALESCE(100) */ substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||
from ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -21,7 +21,7 @@ WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||
SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||
|
@ -29,7 +29,7 @@ WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/
|
||||
|
||||
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
||||
select /*+ COALESCE(100) */ substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
||||
from ${openaire_db_name}.project p
|
||||
lateral view explode(p.h2020classification) classifs as class
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/
|
||||
|
@ -93,7 +93,7 @@ WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EO
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/
|
||||
|
||||
create table ${stats_db_name}.funder STORED AS PARQUET as
|
||||
select distinct xpath_string(fund, '//funder/id') as id,
|
||||
select /*+ COALESCE(100) */ distinct xpath_string(fund, '//funder/id') as id,
|
||||
xpath_string(fund, '//funder/name') as name,
|
||||
xpath_string(fund, '//funder/shortname') as shortname,
|
||||
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
|
||||
|
@ -102,7 +102,7 @@ from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fun
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
|
||||
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
|
||||
SELECT /*+ COALESCE(100) */ distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
|
||||
properties[0].value contribution, properties[1].value currency
|
||||
from ${openaire_db_name}.relation r
|
||||
LATERAL VIEW explode (r.properties) properties
|
||||
|
|
|
@ -130,7 +130,7 @@ with
|
|||
lvl1 as (select id, topic from ${stats_db_name}.result_topics where topic like '__ %' and type='Fields of Science and Technology classification'),
|
||||
lvl2 as (select id, topic from ${stats_db_name}.result_topics where topic like '____ %' and type='Fields of Science and Technology classification'),
|
||||
lvl3 as (select id, topic from ${stats_db_name}.result_topics where topic like '______ %' and type='Fields of Science and Technology classification')
|
||||
select lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
|
||||
select /*+ COALESCE(100) */ lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
|
||||
from lvl1
|
||||
join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)
|
||||
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4); /*EOS*/
|
||||
|
@ -138,7 +138,7 @@ from lvl1
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||
SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultOrganization'
|
||||
and r.target like '50|%'
|
||||
|
@ -147,8 +147,7 @@ WHERE r.reltype = 'resultOrganization'
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
|
||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||
select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||
FROM ${stats_db_name}.result r
|
||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; /*EOS*/
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.organization purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS
|
||||
SELECT substr(o.id, 4) as id,
|
||||
SELECT /*+ COALESCE(100) */ substr(o.id, 4) as id,
|
||||
o.legalname.value as name,
|
||||
o.legalshortname.value as legalshortname,
|
||||
o.country.classid as country
|
||||
|
|
|
@ -303,8 +303,7 @@
|
|||
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||
</spark>
|
||||
<!-- <ok to="Step8"/>-->
|
||||
<ok to="End"/>
|
||||
<ok to="Step8"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
@ -382,7 +381,8 @@
|
|||
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||
<arg>--external_stats_db_name</arg><arg>${external_stats_db_name}</arg>
|
||||
</spark>
|
||||
<ok to="Step11"/>
|
||||
<!-- <ok to="Step11"/>-->
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
|
Loading…
Reference in New Issue