forked from D-Net/dnet-hadoop
computing stats after every table creation
This commit is contained in:
parent
619aa34a15
commit
2ed9c5504c
|
@ -37,15 +37,22 @@ FROM ${external_stats_db_name}.licenses_normalized; /*EOS*/
|
|||
create or replace view ${stats_db_name}.usage_stats as
|
||||
select * from openaire_prod_usage_stats.usage_stats; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.usage_stats COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
create or replace view ${stats_db_name}.downloads_stats as
|
||||
select * from openaire_prod_usage_stats.downloads_stats; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.download_stats COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
create or replace view ${stats_db_name}.pageviews_stats as
|
||||
select * from openaire_prod_usage_stats.pageviews_stats; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.pageviews COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
create or replace view ${stats_db_name}.views_stats as
|
||||
select * from openaire_prod_usage_stats.views_stats; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.views_stats COMPUTE STATISTICS; /*EOS*/
|
||||
------------------------------------------------------------------------------------------------
|
||||
------------------------------------------------------------------------------------------------
|
||||
-- Creation date of the database
|
||||
|
|
|
@ -20,6 +20,8 @@ LEFT OUTER JOIN
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_sources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
|
||||
|
@ -32,7 +34,9 @@ LEFT OUTER JOIN
|
|||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_sources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
|
||||
|
@ -45,7 +49,9 @@ LEFT OUTER JOIN
|
|||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_sources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
|
||||
|
@ -59,6 +65,8 @@ LEFT OUTER JOIN
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS
|
||||
SELECT * FROM ${stats_db_name}.publication_sources
|
||||
UNION ALL
|
||||
|
@ -80,6 +88,8 @@ from (
|
|||
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
|
||||
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_orcid COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_result purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
|
||||
|
@ -95,6 +105,8 @@ where reltype='resultResult'
|
|||
and r2.resulttype.classname != 'other'
|
||||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_result COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_citations_oc purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
|
||||
|
@ -112,6 +124,8 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
|
|||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
|
||||
group by substr(target, 4); /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_citations COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_references_oc purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
|
||||
|
@ -127,4 +141,6 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
|
|||
and r1.resulttype.classname != 'other'
|
||||
and r2.resulttype.classname != 'other'
|
||||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
|
||||
group by substr(source, 4); /*EOS*/
|
||||
group by substr(source, 4); /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_references_oc COMPUTE STATISTICS; /*EOS*/
|
|
@ -14,6 +14,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
|
|||
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_licenses purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
|
||||
|
@ -21,6 +23,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
|
|||
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_licenses purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
|
||||
|
@ -28,6 +32,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
|
|||
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_licenses purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
|
||||
|
@ -35,6 +41,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type
|
|||
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearproduct_licenses COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
|
||||
SELECT * FROM ${stats_db_name}.publication_licenses
|
||||
UNION ALL
|
||||
|
@ -50,6 +58,8 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET
|
|||
select /*+ COALESCE(100) */ substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
|
||||
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.organization_sources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
|
||||
|
@ -62,9 +72,13 @@ FROM (
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_accessroute purge; /*EOS*/
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as
|
||||
select /*+ COALESCE(100) */ distinct substr(id,4) as id, accessroute from ${openaire_db_name}.result
|
||||
lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute
|
||||
WHERE datainfo.deletedbyinference=false and datainfo.invisible = FALSE; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_accessroute COMPUTE STATISTICS; /*EOS*/
|
|
@ -26,6 +26,8 @@ from (
|
|||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||
where peer_reviewed.id is null) pr; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; /*EOS*/
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
|
||||
with peer_reviewed as (
|
||||
|
@ -44,6 +46,8 @@ from (
|
|||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||
where peer_reviewed.id is null) pr; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; /*EOS*/
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
|
||||
with peer_reviewed as (
|
||||
|
@ -62,6 +66,8 @@ from (
|
|||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||
where peer_reviewed.id is null) pr; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; /*EOS*/
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
|
||||
with peer_reviewed as (
|
||||
|
@ -80,6 +86,8 @@ from (
|
|||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||
where peer_reviewed.id is null) pr; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
|
||||
select * from ${stats_db_name}.publication_refereed
|
||||
union all
|
||||
|
@ -97,6 +105,8 @@ cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.va
|
|||
from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids
|
||||
where measures_ids.id!='views' and measures_ids.id!='downloads'; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.indi_impact_measures COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_apc_affiliations purge; /*EOS*/
|
||||
|
||||
create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as
|
||||
|
@ -107,3 +117,5 @@ from ${openaire_db_name}.relation rel
|
|||
join ${openaire_db_name}.organization o on o.id=rel.source
|
||||
join ${openaire_db_name}.result r on r.id=rel.target
|
||||
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_apc_affiliations COMPUTE STATISTICS; /*EOS*/
|
|
@ -6,36 +6,56 @@ set mapred.job.queue.name=analytics; /*EOS*/
|
|||
DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/
|
||||
|
||||
create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
|
||||
select /*+ COALESCE(100) */ r.id, count(distinct p.id) as count
|
||||
select /*+ COALESCE(100) */ r.id, count(distinct rp.project) as count
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||
group by r.id; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/
|
||||
|
||||
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
||||
select /*+ COALESCE(100) */ r.id, count(distinct p.funder) as count
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||
group by r.id; /*EOS*/
|
||||
ANALYSE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_resultcount purge; /*EOS*/
|
||||
|
||||
create table if not exists ${stats_db_name}.project_res stored as parquet as
|
||||
select distinct r.id as res, r.type, p.id as pid
|
||||
from ${stats_db_name}.project p
|
||||
left outer join ${stats_db_name}.result_projects rp on rp.project=p.id
|
||||
left outer join ${stats_db_name}.result r on r.id=rp.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
|
||||
create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
|
||||
with rcount as (
|
||||
select p.id as pid, count(distinct r.id) as `count`, r.type as type
|
||||
from ${stats_db_name}.project p
|
||||
left outer join ${stats_db_name}.result_projects rp on rp.project=p.id
|
||||
left outer join ${stats_db_name}.result r on r.id=rp.id
|
||||
group by r.type, p.id )
|
||||
select /*+ COALESCE(100) */ rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications,
|
||||
sum(case when rcount.type='dataset' then rcount.count else 0 end) as datasets,
|
||||
sum(case when rcount.type='software' then rcount.count else 0 end) as software,
|
||||
sum(case when rcount.type='other' then rcount.count else 0 end) as other
|
||||
from rcount
|
||||
group by rcount.pid; /*EOS*/
|
||||
select pid,
|
||||
sum(case when rp.type='publication' then 1 else 0 end) as publications,
|
||||
sum(case when rp.type='dataset' then 1 else 0 end) as datasets,
|
||||
sum(case when rp.type='software' then 1 else 0 end) as software,
|
||||
sum(case when rp.type='other' then 1 else 0 end) as other
|
||||
from ${stats_db_name}.project_res
|
||||
group by pid; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
drop table ${stats_db_name}.project_res; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/
|
||||
drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/
|
||||
|
||||
create table if not exists ${stats_db_name}.result_funder stored as parquet as
|
||||
select distinct rp.id, p.funder
|
||||
from ${stats_db_name}.result_projects rp
|
||||
join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
||||
select /*+ COALESCE(100) */ r.id, count(rf.funder) as count
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.result_funder rf on rf.id=r.id
|
||||
group by r.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
drop table ${stats_db_name}.result_funder; /*EOS*/
|
||||
|
||||
create or replace view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; /*EOS*/
|
||||
create or replace view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure; /*EOS*/
|
||||
|
@ -55,6 +75,8 @@ from (
|
|||
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view outer explode(inst.pid) pids as p) r
|
||||
join ${stats_db_name}.result res on res.id=r.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_instance COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge; /*EOS*/
|
||||
|
||||
create table if not exists ${stats_db_name}.result_apc STORED AS PARQUET as
|
||||
|
@ -65,4 +87,6 @@ from (
|
|||
join ${stats_db_name}.result res on res.id=r.id
|
||||
where r.amount is not null; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_apc COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset; /*EOS*/
|
|
@ -41,6 +41,7 @@ select /*+ COALESCE(100) */
|
|||
from ${openaire_db_name}.publication pub
|
||||
left outer join pub_delayed on pub.id=pub_delayed.pub_id
|
||||
where pub.datainfo.deletedbyinference = false and pub.datainfo.invisible = false; /*EOS*/
|
||||
ANALYSE TABLE ${stats_db_name}.publication COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/
|
||||
|
@ -50,6 +51,7 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, instancetype.classname as typ
|
|||
from ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
ANALYSE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/
|
||||
|
||||
|
@ -62,6 +64,8 @@ from ${openaire_db_name}.publication p
|
|||
LATERAL VIEW explode(p.context) contexts as context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
|
||||
|
@ -75,6 +79,8 @@ FROM (
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
|
||||
|
@ -82,6 +88,8 @@ select /*+ COALESCE(100) */ substr(p.id, 4) as id, p.language.classname as langu
|
|||
FROM ${openaire_db_name}.publication p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
|
||||
|
@ -90,6 +98,8 @@ FROM ${openaire_db_name}.publication p
|
|||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
|
||||
|
@ -98,6 +108,8 @@ FROM ${openaire_db_name}.publication p
|
|||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
|
||||
|
@ -106,6 +118,8 @@ FROM ${openaire_db_name}.publication p
|
|||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
|
||||
|
@ -114,3 +128,5 @@ FROM ${openaire_db_name}.publication p
|
|||
lateral view explode(p.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS; /*EOS*/
|
|
@ -42,6 +42,7 @@ from ${openaire_db_name}.dataset datast
|
|||
left outer join datast_delayed on datast.id=datast_delayed.datast_id
|
||||
where datast.datainfo.deletedbyinference = false and datast.datainfo.invisible = false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/
|
||||
|
||||
|
@ -52,6 +53,8 @@ FROM ${openaire_db_name}.dataset d
|
|||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_citations COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
|
||||
|
@ -60,6 +63,8 @@ FROM ${openaire_db_name}.dataset p
|
|||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
|
||||
|
@ -71,6 +76,8 @@ from ${openaire_db_name}.dataset p
|
|||
LATERAL VIEW explode(p.context) contexts as context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
|
||||
|
@ -85,6 +92,8 @@ FROM (
|
|||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
|
||||
|
@ -92,6 +101,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu
|
|||
FROM ${openaire_db_name}.dataset p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
|
||||
|
@ -100,6 +111,8 @@ FROM ${openaire_db_name}.dataset p
|
|||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
|
||||
|
@ -108,10 +121,14 @@ FROM ${openaire_db_name}.dataset p
|
|||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS; /*EOS*/
|
|
@ -42,6 +42,7 @@ from ${openaire_db_name}.software soft
|
|||
left outer join soft_delayed on soft.id=soft_delayed.soft_id
|
||||
where soft.datainfo.deletedbyinference = false and soft.datainfo.invisible = false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/
|
||||
|
||||
|
@ -52,6 +53,8 @@ FROM ${openaire_db_name}.software s
|
|||
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_citations COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
|
||||
|
@ -60,6 +63,8 @@ FROM ${openaire_db_name}.software p
|
|||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
|
||||
|
@ -71,6 +76,8 @@ FROM ${openaire_db_name}.software p
|
|||
LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
|
||||
|
@ -85,6 +92,8 @@ FROM (
|
|||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
|
||||
|
@ -92,6 +101,8 @@ select /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu
|
|||
FROM ${openaire_db_name}.software p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
|
||||
|
@ -100,6 +111,8 @@ FROM ${openaire_db_name}.software p
|
|||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
|
||||
|
@ -108,10 +121,14 @@ FROM ${openaire_db_name}.software p
|
|||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS; /*EOS*/
|
|
@ -42,6 +42,7 @@ from ${openaire_db_name}.otherresearchproduct other
|
|||
left outer join other_delayed on other.id=other_delayed.other_id
|
||||
where other.datainfo.deletedbyinference = false and other.datainfo.invisible = false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
-- Otherresearchproduct_citations
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/
|
||||
|
@ -52,6 +53,8 @@ FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo
|
|||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_citations COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
|
||||
|
@ -59,6 +62,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS typ
|
|||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
|
||||
|
@ -69,6 +74,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case
|
|||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
|
||||
|
@ -80,6 +87,8 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
|
||||
|
@ -87,6 +96,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu
|
|||
FROM ${openaire_db_name}.otherresearchproduct p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
|
||||
|
@ -94,6 +105,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
|
|||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
|
||||
|
@ -101,9 +114,13 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS t
|
|||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
|
||||
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS; /*EOS*/
|
|
@ -12,6 +12,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
|
|||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
||||
|
@ -20,6 +22,8 @@ from ${openaire_db_name}.relation r
|
|||
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||
|
@ -28,6 +32,8 @@ FROM ${openaire_db_name}.relation r
|
|||
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/
|
||||
|
||||
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||
|
@ -36,6 +42,8 @@ from ${openaire_db_name}.project p
|
|||
lateral view explode(p.h2020classification) classifs as class
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project_classification COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project stored as parquet as
|
||||
|
@ -99,6 +107,7 @@ left outer join num_pubs_pr on num_pubs_pr.pr_id = p.id
|
|||
left outer join num_pub_delayed npd on npd.pr_id=p.id
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible = false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/
|
||||
|
||||
|
@ -109,6 +118,8 @@ select /*+ COALESCE(100) */ distinct xpath_string(fund, '//funder/id') as
|
|||
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
|
||||
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.funder COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
|
||||
|
@ -117,4 +128,6 @@ properties[0].value contribution, properties[1].value currency
|
|||
from ${openaire_db_name}.relation r
|
||||
LATERAL VIEW explode (r.properties) properties
|
||||
where properties[0].key='contribution' and r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||
and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||
and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.project_organization_contribution COMPUTE STATISTICS; /*EOS*/
|
|
@ -131,6 +131,8 @@ DROP TABLE IF EXISTS ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/
|
|||
create table ${stats_db_name}.result_fos_base_tmp stored as parquet as
|
||||
select /*+ COALESCE(100) */ id, topic from ${stats_db_name}.result_topics where type='Fields of Science and Technology classification'; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_fos_base_tmp COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_fos purge; /*EOS*/
|
||||
|
||||
create table ${stats_db_name}.result_fos stored as parquet as
|
||||
|
@ -145,8 +147,9 @@ from lvl1
|
|||
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4)
|
||||
join lvl4 on lvl4.id=lvl1.id and substr(lvl4.topic, 1, 6)=substr(lvl3.topic, 1, 6); /*EOS*/
|
||||
|
||||
DROP TABLE ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/
|
||||
ANALYSE TABLE ${stats_db_name}.result_fos COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/
|
||||
|
||||
|
@ -157,10 +160,12 @@ WHERE r.reltype = 'resultOrganization'
|
|||
and r.target like '50|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
|
||||
select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||
FROM ${stats_db_name}.result r
|
||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||
JOIN ${stats_db_name}.project p ON p.id = pr.id; /*EOS*/
|
||||
select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, pr.provenance
|
||||
FROM ${stats_db_name}.project_results pr; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS; /*EOS*/
|
|
@ -9,13 +9,21 @@ set mapred.job.queue.name=analytics; /*EOS*/
|
|||
------------------------------------------------------------
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource purge; /*EOS*/
|
||||
|
||||
create table ${stats_db_name}.harested_datasources stored as parquet as
|
||||
select distinct inst.hostedby.key as d_id
|
||||
from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.harested_datasources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
create table ${stats_db_name}.piwik_datasource stored as parquet as
|
||||
select id, split(originalidd, '\\:')[1] as piwik_id
|
||||
from ${openaire_db_name}.datasource
|
||||
lateral view explode(originalid) temp as originalidd
|
||||
where originalidd like "piwik:%"; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.piwik_datasource COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource stored as parquet as
|
||||
with piwik_datasource as (
|
||||
select id, split(originalidd, '\\:')[1] as piwik_id
|
||||
from ${openaire_db_name}.datasource
|
||||
lateral view explode(originalid) temp as originalidd
|
||||
where originalidd like "piwik:%"
|
||||
)
|
||||
select /*+ COALESCE(100) */
|
||||
substr(dtrce.id, 4) as id,
|
||||
case when dtrce.officialname.value='Unknown Repository' then 'Other' else dtrce.officialname.value end as name,
|
||||
|
@ -31,10 +39,14 @@ select /*+ COALESCE(100) */
|
|||
dtrce.journal.issnprinted as issn_printed,
|
||||
dtrce.journal.issnonline as issn_online
|
||||
from ${openaire_db_name}.datasource dtrce
|
||||
left outer join (select inst.hostedby.key as d_id from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst) res on res.d_id=dtrce.id
|
||||
left outer join piwik_datasource piwik_d on piwik_d.id=dtrce.id
|
||||
left outer join ${stats_db_name}.harested_datasources res on res.d_id=dtrce.id
|
||||
left outer join ${stats_db_name}.piwik_datasource piwik_d on piwik_d.id=dtrce.id
|
||||
where dtrce.datainfo.deletedbyinference = false and dtrce.datainfo.invisible = false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.datasource COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
drop table ${stats_db_name}.harested_datasources; /*EOS*/
|
||||
drop table ${stats_db_name}.piwik_datasource; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/
|
||||
|
||||
|
@ -43,6 +55,8 @@ SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, langs.languages AS language
|
|||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
|
||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
|
||||
|
@ -50,6 +64,8 @@ SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, oids.ids AS oid
|
|||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
|
||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; /*EOS*/
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
|
||||
|
@ -57,6 +73,8 @@ SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS or
|
|||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
-- datasource sources:
|
||||
-- where the datasource info have been collected from.
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; /*EOS*/
|
||||
|
@ -66,6 +84,8 @@ select /*+ COALESCE(100) */ substr(d.id, 4) as id, substr(cf.key, 4) as datasour
|
|||
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
|
||||
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.datasource_sources COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
||||
SELECT datasource AS id, id AS result
|
||||
FROM ${stats_db_name}.result_datasources; /*EOS*/
|
||||
|
|
|
@ -15,6 +15,8 @@ SELECT /*+ COALESCE(100) */ substr(o.id, 4) as id,
|
|||
FROM ${openaire_db_name}.organization o
|
||||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; /*EOS*/
|
||||
|
||||
ANALYSE TABLE ${stats_db_name}.organization COMPUTE STATISTICS; /*EOS*/
|
||||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
|
||||
SELECT organization AS id, id AS datasource
|
||||
FROM ${stats_db_name}.datasource_organizations; /*EOS*/
|
||||
|
|
Loading…
Reference in New Issue