Compare commits

...

2 Commits

13 changed files with 218 additions and 34 deletions

View File

@ -37,15 +37,22 @@ FROM ${external_stats_db_name}.licenses_normalized; /*EOS*/
create or replace view ${stats_db_name}.usage_stats as
select * from openaire_prod_usage_stats.usage_stats; /*EOS*/
ANALYSE TABLE ${stats_db_name}.usage_stats COMPUTE STATISTICS; /*EOS*/
create or replace view ${stats_db_name}.downloads_stats as
select * from openaire_prod_usage_stats.downloads_stats; /*EOS*/
ANALYSE TABLE ${stats_db_name}.download_stats COMPUTE STATISTICS; /*EOS*/
create or replace view ${stats_db_name}.pageviews_stats as
select * from openaire_prod_usage_stats.pageviews_stats; /*EOS*/
ANALYSE TABLE ${stats_db_name}.pageviews COMPUTE STATISTICS; /*EOS*/
create or replace view ${stats_db_name}.views_stats as
select * from openaire_prod_usage_stats.views_stats; /*EOS*/
ANALYSE TABLE ${stats_db_name}.views_stats COMPUTE STATISTICS; /*EOS*/
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
-- Creation date of the database

View File

@ -20,6 +20,8 @@ LEFT OUTER JOIN
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_sources purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
@ -33,6 +35,8 @@ LEFT OUTER JOIN
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_sources purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
@ -46,6 +50,8 @@ LEFT OUTER JOIN
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_sources purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
@ -59,6 +65,8 @@ LEFT OUTER JOIN
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS; /*EOS*/
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS
SELECT * FROM ${stats_db_name}.publication_sources
UNION ALL
@ -80,6 +88,8 @@ from (
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_orcid COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_result purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
@ -95,6 +105,8 @@ where reltype='resultResult'
and r2.resulttype.classname != 'other'
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_result COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_citations_oc purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
@ -112,6 +124,8 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
group by substr(target, 4); /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_citations COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_references_oc purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
@ -128,3 +142,5 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
and r2.resulttype.classname != 'other'
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
group by substr(source, 4); /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_references_oc COMPUTE STATISTICS; /*EOS*/

View File

@ -14,6 +14,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_licenses purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
@ -21,6 +23,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_licenses purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
@ -28,6 +32,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_licenses purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
@ -35,6 +41,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearproduct_licenses COMPUTE STATISTICS; /*EOS*/
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
SELECT * FROM ${stats_db_name}.publication_licenses
UNION ALL
@ -50,6 +58,8 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET
select /*+ COALESCE(100) */ substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; /*EOS*/
ANALYSE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.organization_sources purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
@ -62,9 +72,13 @@ FROM (
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_accessroute purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as
select /*+ COALESCE(100) */ distinct substr(id,4) as id, accessroute from ${openaire_db_name}.result
lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute
WHERE datainfo.deletedbyinference=false and datainfo.invisible = FALSE; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_accessroute COMPUTE STATISTICS; /*EOS*/

View File

@ -26,6 +26,8 @@ from (
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
with peer_reviewed as (
@ -44,6 +46,8 @@ from (
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
with peer_reviewed as (
@ -62,6 +66,8 @@ from (
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; /*EOS*/
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
with peer_reviewed as (
@ -80,6 +86,8 @@ from (
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS; /*EOS*/
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
select * from ${stats_db_name}.publication_refereed
union all
@ -97,6 +105,8 @@ cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.va
from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids
where measures_ids.id!='views' and measures_ids.id!='downloads'; /*EOS*/
ANALYSE TABLE ${stats_db_name}.indi_impact_measures COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_apc_affiliations purge; /*EOS*/
create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as
@ -107,3 +117,5 @@ from ${openaire_db_name}.relation rel
join ${openaire_db_name}.organization o on o.id=rel.source
join ${openaire_db_name}.result r on r.id=rel.target
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_apc_affiliations COMPUTE STATISTICS; /*EOS*/

View File

@ -6,36 +6,56 @@ set mapred.job.queue.name=analytics; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/
create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
select /*+ COALESCE(100) */ r.id, count(distinct p.id) as count
select /*+ COALESCE(100) */ r.id, count(distinct rp.project) as count
from ${stats_db_name}.result r
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
left outer join ${stats_db_name}.project p on p.id=rp.project
group by r.id; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
select /*+ COALESCE(100) */ r.id, count(distinct p.funder) as count
from ${stats_db_name}.result r
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
left outer join ${stats_db_name}.project p on p.id=rp.project
group by r.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project_resultcount purge; /*EOS*/
create table if not exists ${stats_db_name}.project_res stored as parquet as
select distinct r.id as res, r.type, p.id as pid
from ${stats_db_name}.project p
left outer join ${stats_db_name}.result_projects rp on rp.project=p.id
left outer join ${stats_db_name}.result r on r.id=rp.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/
create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
with rcount as (
select p.id as pid, count(distinct r.id) as `count`, r.type as type
from ${stats_db_name}.project p
left outer join ${stats_db_name}.result_projects rp on rp.project=p.id
left outer join ${stats_db_name}.result r on r.id=rp.id
group by r.type, p.id )
select /*+ COALESCE(100) */ rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications,
sum(case when rcount.type='dataset' then rcount.count else 0 end) as datasets,
sum(case when rcount.type='software' then rcount.count else 0 end) as software,
sum(case when rcount.type='other' then rcount.count else 0 end) as other
from rcount
group by rcount.pid; /*EOS*/
select pid,
sum(case when rp.type='publication' then 1 else 0 end) as publications,
sum(case when rp.type='dataset' then 1 else 0 end) as datasets,
sum(case when rp.type='software' then 1 else 0 end) as software,
sum(case when rp.type='other' then 1 else 0 end) as other
from ${stats_db_name}.project_res
group by pid; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS; /*EOS*/
drop table ${stats_db_name}.project_res; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/
drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/
create table if not exists ${stats_db_name}.result_funder stored as parquet as
select distinct rp.id, p.funder
from ${stats_db_name}.result_projects rp
join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
select /*+ COALESCE(100) */ r.id, count(rf.funder) as count
from ${stats_db_name}.result r
left outer join ${stats_db_name}.result_funder rf on rf.id=r.id
group by r.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS; /*EOS*/
drop table ${stats_db_name}.result_funder; /*EOS*/
create or replace view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; /*EOS*/
create or replace view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure; /*EOS*/
@ -55,6 +75,8 @@ from (
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view outer explode(inst.pid) pids as p) r
join ${stats_db_name}.result res on res.id=r.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_instance COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge; /*EOS*/
create table if not exists ${stats_db_name}.result_apc STORED AS PARQUET as
@ -65,4 +87,6 @@ from (
join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_apc COMPUTE STATISTICS; /*EOS*/
create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset; /*EOS*/

View File

@ -41,6 +41,7 @@ select /*+ COALESCE(100) */
from ${openaire_db_name}.publication pub
left outer join pub_delayed on pub.id=pub_delayed.pub_id
where pub.datainfo.deletedbyinference = false and pub.datainfo.invisible = false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/
@ -50,6 +51,7 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, instancetype.classname as typ
from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/
@ -62,6 +64,8 @@ from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
@ -75,6 +79,8 @@ FROM (
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
@ -82,6 +88,8 @@ select /*+ COALESCE(100) */ substr(p.id, 4) as id, p.language.classname as langu
FROM ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
@ -90,6 +98,8 @@ FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
@ -98,6 +108,8 @@ FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
@ -106,6 +118,8 @@ FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
@ -114,3 +128,5 @@ FROM ${openaire_db_name}.publication p
lateral view explode(p.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS; /*EOS*/

View File

@ -42,6 +42,7 @@ from ${openaire_db_name}.dataset datast
left outer join datast_delayed on datast.id=datast_delayed.datast_id
where datast.datainfo.deletedbyinference = false and datast.datainfo.invisible = false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/
@ -52,6 +53,8 @@ FROM ${openaire_db_name}.dataset d
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_citations COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
@ -60,6 +63,8 @@ FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
@ -71,6 +76,8 @@ from ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
@ -85,6 +92,8 @@ FROM (
FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
@ -92,6 +101,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu
FROM ${openaire_db_name}.dataset p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
@ -100,6 +111,8 @@ FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
@ -108,6 +121,8 @@ FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
@ -115,3 +130,5 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.cl
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS; /*EOS*/

View File

@ -42,6 +42,7 @@ from ${openaire_db_name}.software soft
left outer join soft_delayed on soft.id=soft_delayed.soft_id
where soft.datainfo.deletedbyinference = false and soft.datainfo.invisible = false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/
@ -52,6 +53,8 @@ FROM ${openaire_db_name}.software s
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_citations COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
@ -60,6 +63,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
@ -71,6 +76,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
@ -85,6 +92,8 @@ FROM (
FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
@ -92,6 +101,8 @@ select /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu
FROM ${openaire_db_name}.software p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
@ -100,6 +111,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
@ -108,6 +121,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
@ -115,3 +130,5 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.cl
FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS; /*EOS*/

View File

@ -42,6 +42,7 @@ from ${openaire_db_name}.otherresearchproduct other
left outer join other_delayed on other.id=other_delayed.other_id
where other.datainfo.deletedbyinference = false and other.datainfo.invisible = false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct COMPUTE STATISTICS; /*EOS*/
-- Otherresearchproduct_citations
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/
@ -52,6 +53,8 @@ FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_citations COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
@ -59,6 +62,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS typ
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
@ -69,6 +74,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
@ -80,6 +87,8 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
@ -87,6 +96,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu
FROM ${openaire_db_name}.otherresearchproduct p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
@ -94,6 +105,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
@ -101,9 +114,13 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS t
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS; /*EOS*/

View File

@ -12,6 +12,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
@ -20,6 +22,8 @@ from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
@ -28,6 +32,8 @@ FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' and r.target like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/
create table ${stats_db_name}.project_classification STORED AS PARQUET as
@ -36,6 +42,8 @@ from ${openaire_db_name}.project p
lateral view explode(p.h2020classification) classifs as class
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project_classification COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project stored as parquet as
@ -99,6 +107,7 @@ left outer join num_pubs_pr on num_pubs_pr.pr_id = p.id
left outer join num_pub_delayed npd on npd.pr_id=p.id
where p.datainfo.deletedbyinference = false and p.datainfo.invisible = false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/
@ -109,6 +118,8 @@ select /*+ COALESCE(100) */ distinct xpath_string(fund, '//funder/id') as
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; /*EOS*/
ANALYSE TABLE ${stats_db_name}.funder COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
@ -118,3 +129,5 @@ from ${openaire_db_name}.relation r
LATERAL VIEW explode (r.properties) properties
where properties[0].key='contribution' and r.reltype = 'projectOrganization' and r.source like '40|%'
and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.project_organization_contribution COMPUTE STATISTICS; /*EOS*/

View File

@ -131,6 +131,8 @@ DROP TABLE IF EXISTS ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/
create table ${stats_db_name}.result_fos_base_tmp stored as parquet as
select /*+ COALESCE(100) */ id, topic from ${stats_db_name}.result_topics where type='Fields of Science and Technology classification'; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_fos_base_tmp COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_fos purge; /*EOS*/
create table ${stats_db_name}.result_fos stored as parquet as
@ -145,8 +147,9 @@ from lvl1
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4)
join lvl4 on lvl4.id=lvl1.id and substr(lvl4.topic, 1, 6)=substr(lvl3.topic, 1, 6); /*EOS*/
DROP TABLE ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_fos COMPUTE STATISTICS; /*EOS*/
DROP TABLE ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/
@ -157,10 +160,12 @@ WHERE r.reltype = 'resultOrganization'
and r.target like '50|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
FROM ${stats_db_name}.result r
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
JOIN ${stats_db_name}.project p ON p.id = pr.id; /*EOS*/
select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, pr.provenance
FROM ${stats_db_name}.project_results pr; /*EOS*/
ANALYSE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS; /*EOS*/

View File

@ -21,6 +21,20 @@ from ${openaire_db_name}.datasource
lateral view explode(originalid) temp as originalidd
where originalidd like "piwik:%"; /*EOS*/
create table ${stats_db_name}.harested_datasources stored as parquet as
select distinct inst.hostedby.key as d_id
from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst; /*EOS*/
ANALYSE TABLE ${stats_db_name}.harested_datasources COMPUTE STATISTICS; /*EOS*/
create table ${stats_db_name}.piwik_datasource stored as parquet as
select id, split(originalidd, '\\:')[1] as piwik_id
from ${openaire_db_name}.datasource
lateral view explode(originalid) temp as originalidd
where originalidd like "piwik:%"; /*EOS*/
ANALYSE TABLE ${stats_db_name}.piwik_datasource COMPUTE STATISTICS; /*EOS*/
CREATE TABLE ${stats_db_name}.datasource stored as parquet as
select /*+ COALESCE(100) */
substr(dtrce.id, 4) as id,
@ -41,6 +55,8 @@ left outer join ${stats_db_name}.harested_datasources res on res.d_id=dtrce.id
left outer join ${stats_db_name}.piwik_datasource piwik_d on piwik_d.id=dtrce.id
where dtrce.datainfo.deletedbyinference = false and dtrce.datainfo.invisible = false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.datasource COMPUTE STATISTICS; /*EOS*/
drop table ${stats_db_name}.harested_datasources; /*EOS*/
drop table ${stats_db_name}.piwik_datasource; /*EOS*/
@ -51,6 +67,8 @@ SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, langs.languages AS language
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
@ -58,6 +76,8 @@ SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; /*EOS*/
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
@ -65,6 +85,8 @@ SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS or
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS; /*EOS*/
-- datasource sources:
-- where the datasource info have been collected from.
DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; /*EOS*/
@ -74,6 +96,8 @@ select /*+ COALESCE(100) */ substr(d.id, 4) as id, substr(cf.key, 4) as datasour
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/
ANALYSE TABLE ${stats_db_name}.datasource_sources COMPUTE STATISTICS; /*EOS*/
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
SELECT datasource AS id, id AS result
FROM ${stats_db_name}.result_datasources; /*EOS*/

View File

@ -15,6 +15,8 @@ SELECT /*+ COALESCE(100) */ substr(o.id, 4) as id,
FROM ${openaire_db_name}.organization o
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; /*EOS*/
ANALYSE TABLE ${stats_db_name}.organization COMPUTE STATISTICS; /*EOS*/
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
SELECT organization AS id, id AS datasource
FROM ${stats_db_name}.datasource_organizations; /*EOS*/