Merge pull request '[stats-wf]fixed the result_result creation for monitor' (#190) from antonis.lempesis/dnet-hadoop:beta into beta

Reviewed-on: #190
This commit is contained in:
Miriam Baglioni 2022-02-03 13:00:56 +01:00
commit 89922156c9
12 changed files with 73 additions and 70 deletions

View File

@ -14,7 +14,7 @@ LEFT OUTER JOIN
( (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
@ -25,7 +25,7 @@ LEFT OUTER JOIN
( (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
@ -36,7 +36,7 @@ LEFT OUTER JOIN
( (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
@ -47,7 +47,7 @@ LEFT OUTER JOIN
( (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS
SELECT * FROM ${stats_db_name}.publication_sources SELECT * FROM ${stats_db_name}.publication_sources
@ -76,8 +76,8 @@ join ${openaire_db_name}.result r1 on rel.source=r1.id
join ${openaire_db_name}.result r2 on r2.id=rel.target join ${openaire_db_name}.result r2 on r2.id=rel.target
where reltype='resultResult' where reltype='resultResult'
and r1.resulttype.classname!=r2.resulttype.classname and r1.resulttype.classname!=r2.resulttype.classname
and r1.datainfo.deletedbyinference=false and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE
and r2.datainfo.deletedbyinference=false and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
and r1.resulttype.classname != 'other' and r1.resulttype.classname != 'other'
and r2.resulttype.classname != 'other' and r2.resulttype.classname != 'other'
and rel.datainfo.deletedbyinference=false; and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;

View File

@ -8,22 +8,22 @@
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
SELECT * FROM ${stats_db_name}.publication_licenses SELECT * FROM ${stats_db_name}.publication_licenses
@ -46,7 +46,7 @@ FROM (
LEFT OUTER JOIN ( LEFT OUTER JOIN (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS;

View File

@ -9,22 +9,22 @@
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
select * from ${stats_db_name}.publication_refereed select * from ${stats_db_name}.publication_refereed

View File

@ -38,13 +38,13 @@ SELECT substr(p.id, 4) as id,
case when size(p.description) > 0 then true else false end as abstract, case when size(p.description) > 0 then true else false end as abstract,
'publication' as type 'publication' as type
from ${openaire_db_name}.publication p from ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.publication_classifications AS CREATE TABLE ${stats_db_name}.publication_classifications AS
SELECT substr(p.id, 4) as id, instancetype.classname as type SELECT substr(p.id, 4) as id, instancetype.classname as type
from ${openaire_db_name}.publication p from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.publication_concepts AS CREATE TABLE ${stats_db_name}.publication_concepts AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
@ -53,45 +53,45 @@ SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
from ${openaire_db_name}.publication p from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.context) contexts as context LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.publication_datasources as CREATE TABLE ${stats_db_name}.publication_datasources as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
where p.datainfo.deletedbyinference = false) p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
LEFT OUTER JOIN ( LEFT OUTER JOIN (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
CREATE TABLE ${stats_db_name}.publication_languages AS CREATE TABLE ${stats_db_name}.publication_languages AS
select substr(p.id, 4) as id, p.language.classname as language select substr(p.id, 4) as id, p.language.classname as language
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.publication_oids AS CREATE TABLE ${stats_db_name}.publication_oids AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.publication_pids AS CREATE TABLE ${stats_db_name}.publication_pids AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.publication_topics as CREATE TABLE ${stats_db_name}.publication_topics as
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.publication_citations AS CREATE TABLE ${stats_db_name}.publication_citations AS
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
lateral view explode(p.extrainfo) citations AS citation lateral view explode(p.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and p.datainfo.deletedbyinference = false; and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;

View File

@ -81,7 +81,7 @@ compute stats TARGET.result_sources;
create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.result_topics; compute stats TARGET.result_topics;
create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source) or exists (select 1 from TARGET.result r where r.id=orig.target);
compute stats TARGET.result_result; compute stats TARGET.result_result;
-- datasources -- datasources

View File

@ -38,20 +38,20 @@ SELECT substr(d.id, 4) AS id,
CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract, CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract,
'dataset' AS type 'dataset' AS type
FROM ${openaire_db_name}.dataset d FROM ${openaire_db_name}.dataset d
WHERE d.datainfo.deletedbyinference = FALSE; WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.dataset_citations AS CREATE TABLE ${stats_db_name}.dataset_citations AS
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.dataset d FROM ${openaire_db_name}.dataset d
LATERAL VIEW explode(d.extrainfo) citations AS citation LATERAL VIEW explode(d.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and d.datainfo.deletedbyinference = false; and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.dataset_classifications AS CREATE TABLE ${stats_db_name}.dataset_classifications AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.dataset_concepts AS CREATE TABLE ${stats_db_name}.dataset_concepts AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
from ${openaire_db_name}.dataset p from ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.context) contexts as context LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.dataset_datasources AS CREATE TABLE ${stats_db_name}.dataset_datasources AS
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
@ -68,31 +68,31 @@ FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance) instances AS instance LATERAL VIEW explode(p.instance) instances AS instance
where p.datainfo.deletedbyinference = false) p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
LEFT OUTER JOIN ( LEFT OUTER JOIN (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
FROM ${openaire_db_name}.datasource d FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
CREATE TABLE ${stats_db_name}.dataset_languages AS CREATE TABLE ${stats_db_name}.dataset_languages AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.dataset_oids AS CREATE TABLE ${stats_db_name}.dataset_oids AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.dataset_pids AS CREATE TABLE ${stats_db_name}.dataset_pids AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.dataset_topics AS CREATE TABLE ${stats_db_name}.dataset_topics AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;

View File

@ -38,20 +38,20 @@ SELECT substr(s.id, 4) as id,
CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract, CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
'software' as type 'software' as type
from ${openaire_db_name}.software s from ${openaire_db_name}.software s
where s.datainfo.deletedbyinference = false; where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.software_citations AS CREATE TABLE ${stats_db_name}.software_citations AS
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.software s FROM ${openaire_db_name}.software s
LATERAL VIEW explode(s.extrainfo) citations as citation LATERAL VIEW explode(s.extrainfo) citations as citation
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and s.datainfo.deletedbyinference = false; and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.software_classifications AS CREATE TABLE ${stats_db_name}.software_classifications AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.software_concepts AS CREATE TABLE ${stats_db_name}.software_concepts AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.context) contexts AS context LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.software_datasources AS CREATE TABLE ${stats_db_name}.software_datasources AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
@ -68,31 +68,31 @@ FROM (
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance) instances AS instance LATERAL VIEW explode(p.instance) instances AS instance
where p.datainfo.deletedbyinference = false) p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
LEFT OUTER JOIN ( LEFT OUTER JOIN (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
FROM ${openaire_db_name}.datasource d FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
CREATE TABLE ${stats_db_name}.software_languages AS CREATE TABLE ${stats_db_name}.software_languages AS
select substr(p.id, 4) AS id, p.language.classname AS language select substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.software_oids AS CREATE TABLE ${stats_db_name}.software_oids AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.software_pids AS CREATE TABLE ${stats_db_name}.software_pids AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.software_topics AS CREATE TABLE ${stats_db_name}.software_topics AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;

View File

@ -37,19 +37,19 @@ SELECT substr(o.id, 4) AS id,
CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract, CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
'other' AS type 'other' AS type
FROM ${openaire_db_name}.otherresearchproduct o FROM ${openaire_db_name}.otherresearchproduct o
WHERE o.datainfo.deletedbyinference = FALSE; WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false;
-- Otherresearchproduct_citations -- Otherresearchproduct_citations
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and o.datainfo.deletedbyinference = false; and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
@ -57,33 +57,33 @@ SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
where p.datainfo.deletedbyinference = false) p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
LEFT OUTER JOIN(SELECT substr(d.id, 4) id LEFT OUTER JOIN(SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.otherresearchproduct p FROM ${openaire_db_name}.otherresearchproduct p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;

View File

@ -5,24 +5,26 @@
------------------------------------------------------ ------------------------------------------------------
CREATE TABLE ${stats_db_name}.project_oids AS CREATE TABLE ${stats_db_name}.project_oids AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.project_organizations AS CREATE TABLE ${stats_db_name}.project_organizations AS
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
from ${openaire_db_name}.relation r from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' WHERE r.reltype = 'projectOrganization'
and r.datainfo.deletedbyinference = false; and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.project_results AS CREATE TABLE ${stats_db_name}.project_results AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' WHERE r.reltype = 'resultProject'
and r.datainfo.deletedbyinference = false; and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
create table ${stats_db_name}.project_classification as create table ${stats_db_name}.project_classification as
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
from ${openaire_db_name}.project p from ${openaire_db_name}.project p
lateral view explode(p.h2020classification) classifs as class lateral view explode(p.h2020classification) classifs as class
where p.datainfo.deletedbyinference=false and class.h2020programme is not null; where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null;
CREATE TABLE ${stats_db_name}.project_tmp CREATE TABLE ${stats_db_name}.project_tmp
( (
@ -72,7 +74,7 @@ SELECT substr(p.id, 4) AS id,
p.code.value AS code, p.code.value AS code,
p.totalcost AS totalcost p.totalcost AS totalcost
FROM ${openaire_db_name}.project p FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false; WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
create table ${stats_db_name}.funder as create table ${stats_db_name}.funder as
select distinct xpath_string(fund, '//funder/id') as id, select distinct xpath_string(fund, '//funder/id') as id,

View File

@ -127,7 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultOrganization' WHERE r.reltype = 'resultOrganization'
and r.datainfo.deletedbyinference = false; and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.result_projects AS CREATE TABLE ${stats_db_name}.result_projects AS
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance

View File

@ -44,7 +44,7 @@ FROM ${openaire_db_name}.datasource d1
LATERAL VIEW EXPLODE(originalid) temp AS originalidd LATERAL VIEW EXPLODE(originalid) temp AS originalidd
WHERE originalidd like "piwik:%") AS d2 WHERE originalidd like "piwik:%") AS d2
ON d1.id = d2.id ON d1.id = d2.id
WHERE d1.datainfo.deletedbyinference = FALSE; WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false;
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. -- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
-- Creating a temporary dual table that will be removed after the following insert -- Creating a temporary dual table that will be removed after the following insert
@ -82,24 +82,25 @@ WHERE yearofvalidation = '-1';
CREATE TABLE ${stats_db_name}.datasource_languages AS CREATE TABLE ${stats_db_name}.datasource_languages AS
SELECT substr(d.id, 4) AS id, langs.languages AS language SELECT substr(d.id, 4) AS id, langs.languages AS language
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages; FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.datasource_oids AS CREATE TABLE ${stats_db_name}.datasource_oids AS
SELECT substr(d.id, 4) AS id, oids.ids AS oid SELECT substr(d.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.datasource_organizations AS CREATE TABLE ${stats_db_name}.datasource_organizations AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'datasourceOrganization' WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
and r.datainfo.deletedbyinference = false;
-- datasource sources: -- datasource sources:
-- where the datasource info have been collected from. -- where the datasource info have been collected from.
create table if not exists ${stats_db_name}.datasource_sources AS create table if not exists ${stats_db_name}.datasource_sources AS
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
where d.datainfo.deletedbyinference = false; where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
SELECT datasource AS id, id AS result SELECT datasource AS id, id AS result

View File

@ -9,7 +9,7 @@ SELECT substr(o.id, 4) as id,
o.legalshortname.value as legalshortname, o.legalshortname.value as legalshortname,
o.country.classid as country o.country.classid as country
FROM ${openaire_db_name}.organization o FROM ${openaire_db_name}.organization o
WHERE o.datainfo.deletedbyinference = FALSE; WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE;
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
SELECT organization AS id, id AS datasource SELECT organization AS id, id AS datasource