[stats wf] Added sprint 3&4 of indicators #166

Merged
claudio.atzori merged 5 commits from antonis.lempesis/dnet-hadoop:beta into beta 2021-11-29 10:40:26 +01:00
2 changed files with 262 additions and 188 deletions

View File

@ -8,7 +8,7 @@ join result_instance ri on ri.id = p.id
join datasource on datasource.id = ri.hostedby join datasource on datasource.id = ri.hostedby
where datasource.type like '%Repository%' where datasource.type like '%Repository%'
and (ri.accessright = 'Open Access' and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo')) tmp or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
on p.id= tmp.id; on p.id= tmp.id;
create table indi_pub_grey_lit stored as parquet as create table indi_pub_grey_lit stored as parquet as
@ -41,178 +41,178 @@ join datasource on datasource.id = ri.hostedby
where datasource.id like '%doajarticles%') tmp where datasource.id like '%doajarticles%') tmp
on p.id= tmp.id; on p.id= tmp.id;
create table indi_project_pubs_count stored as parquet as --create table indi_project_pubs_count stored as parquet as
select pr.id id, count(p.id) total_pubs from project_results pr --select pr.id id, count(p.id) total_pubs from project_results pr
join publication p on p.id=pr.result --join publication p on p.id=pr.result
group by pr.id; --group by pr.id;
create table indi_project_datasets_count stored as parquet as --create table indi_project_datasets_count stored as parquet as
select pr.id id, count(d.id) total_datasets from project_results pr --select pr.id id, count(d.id) total_datasets from project_results pr
join dataset d on d.id=pr.result --join dataset d on d.id=pr.result
group by pr.id; --group by pr.id;
create table indi_project_software_count stored as parquet as --create table indi_project_software_count stored as parquet as
select pr.id id, count(s.id) total_software from project_results pr --select pr.id id, count(s.id) total_software from project_results pr
join software s on s.id=pr.result --join software s on s.id=pr.result
group by pr.id; --group by pr.id;
create table indi_project_otherresearch_count stored as parquet as --create table indi_project_otherresearch_count stored as parquet as
select pr.id id, count(o.id) total_other from project_results pr --select pr.id id, count(o.id) total_other from project_results pr
join otherresearchproduct o on o.id=pr.result --join otherresearchproduct o on o.id=pr.result
group by pr.id; --group by pr.id;
create table indi_pub_avg_year_country_oa stored as parquet as --create table indi_pub_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, --select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA --round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from --from
(SELECT year, country, SUM(CASE --(SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1 --WHEN bestlicence='Open Access' THEN 1
ELSE 0 --ELSE 0
END) AS OpenAccess, SUM(CASE --END) AS OpenAccess, SUM(CASE
WHEN bestlicence<>'Open Access' THEN 1 --WHEN bestlicence<>'Open Access' THEN 1
ELSE 0 --ELSE 0
END) AS NonOpenAccess --END) AS NonOpenAccess
FROM publication p --FROM publication p
join result_organization ro on p.id=ro.id --join result_organization ro on p.id=ro.id
join organization o on o.id=ro.organization --join organization o on o.id=ro.organization
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by year, country) tmp; --group by year, country) tmp;
create table indi_dataset_avg_year_country_oa stored as parquet as --create table indi_dataset_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, --select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA --round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from --from
(SELECT year, country, SUM(CASE --(SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1 --WHEN bestlicence='Open Access' THEN 1
ELSE 0 --ELSE 0
END) AS OpenAccess, SUM(CASE --END) AS OpenAccess, SUM(CASE
WHEN bestlicence<>'Open Access' THEN 1 --WHEN bestlicence<>'Open Access' THEN 1
ELSE 0 --ELSE 0
END) AS NonOpenAccess --END) AS NonOpenAccess
FROM dataset d --FROM dataset d
join result_organization ro on d.id=ro.id --join result_organization ro on d.id=ro.id
join organization o on o.id=ro.organization --join organization o on o.id=ro.organization
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by year, country) tmp; --group by year, country) tmp;
create table indi_software_avg_year_country_oa stored as parquet as --create table indi_software_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, --select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA --round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from --from
(SELECT year, country, SUM(CASE -- (SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1 --WHEN bestlicence='Open Access' THEN 1
ELSE 0 -- ELSE 0
END) AS OpenAccess, SUM(CASE --END) AS OpenAccess, SUM(CASE
WHEN bestlicence<>'Open Access' THEN 1 -- WHEN bestlicence<>'Open Access' THEN 1
ELSE 0 -- ELSE 0
END) AS NonOpenAccess -- END) AS NonOpenAccess
FROM software s -- FROM software s
join result_organization ro on s.id=ro.id -- join result_organization ro on s.id=ro.id
join organization o on o.id=ro.organization -- join organization o on o.id=ro.organization
where cast(year as int)>=2003 and cast(year as int)<=2021 -- where cast(year as int)>=2003 and cast(year as int)<=2021
group by year, country) tmp; -- group by year, country) tmp;
create table indi_other_avg_year_country_oa stored as parquet as --create table indi_other_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, --select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA --round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from -- from
(SELECT year, country, SUM(CASE -- (SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1 -- WHEN bestlicence='Open Access' THEN 1
ELSE 0 -- ELSE 0
END) AS OpenAccess, SUM(CASE -- END) AS OpenAccess, SUM(CASE
WHEN bestlicence<>'Open Access' THEN 1 -- WHEN bestlicence<>'Open Access' THEN 1
ELSE 0 -- ELSE 0
END) AS NonOpenAccess -- END) AS NonOpenAccess
FROM otherresearchproduct orp -- FROM otherresearchproduct orp
join result_organization ro on orp.id=ro.id -- join result_organization ro on orp.id=ro.id
join organization o on o.id=ro.organization -- join organization o on o.id=ro.organization
where cast(year as int)>=2003 and cast(year as int)<=2021 -- where cast(year as int)>=2003 and cast(year as int)<=2021
group by year, country) tmp; -- group by year, country) tmp;
create table indi_pub_avg_year_context_oa stored as parquet as --create table indi_pub_avg_year_context_oa stored as parquet as
with total as --with total as
(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc --(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc
join context c on pc.concept like concat('%',c.id,'%') --join context c on pc.concept like concat('%',c.id,'%')
join publication p on p.id=pc.id --join publication p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year ) --group by c.name, year )
select year, name, round(no_of_pubs/total*100,3) averageofpubs --select year, name, round(no_of_pubs/total*100,3) averageofpubs
from total; --from total;
create table indi_dataset_avg_year_context_oa stored as parquet as --create table indi_dataset_avg_year_context_oa stored as parquet as
with total as --with total as
(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc --(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc
join context c on pc.concept like concat('%',c.id,'%') --join context c on pc.concept like concat('%',c.id,'%')
join dataset p on p.id=pc.id --join dataset p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year ) --group by c.name, year )
select year, name, round(no_of_pubs/total*100,3) averageofdataset --select year, name, round(no_of_pubs/total*100,3) averageofdataset
from total; --from total;
create table indi_software_avg_year_context_oa stored as parquet as --create table indi_software_avg_year_context_oa stored as parquet as
with total as --with total as
(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc --(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc
join context c on pc.concept like concat('%',c.id,'%') --join context c on pc.concept like concat('%',c.id,'%')
join software p on p.id=pc.id --join software p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year ) --group by c.name, year )
select year, name, round(no_of_pubs/total*100,3) averageofsoftware --select year, name, round(no_of_pubs/total*100,3) averageofsoftware
from total; --from total;
create table indi_other_avg_year_context_oa stored as parquet as --create table indi_other_avg_year_context_oa stored as parquet as
with total as --with total as
(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc --(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc
join context c on pc.concept like concat('%',c.id,'%') --join context c on pc.concept like concat('%',c.id,'%')
join otherresearchproduct p on p.id=pc.id --join otherresearchproduct p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year ) --group by c.name, year )
select year, name, round(no_of_pubs/total*100,3) averageofother --select year, name, round(no_of_pubs/total*100,3) averageofother
from total; --from total;
create table indi_other_avg_year_content_oa stored as parquet as --create table indi_other_avg_year_content_oa stored as parquet as
with total as --with total as
(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total --(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total
from otherresearchproduct_datasources pd --from otherresearchproduct_datasources pd
join datasource d on datasource=d.id --join datasource d on datasource=d.id
join otherresearchproduct p on p.id=pd.id --join otherresearchproduct p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year) --group by d.type, year)
select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct --select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct
from total; --from total;
create table indi_software_avg_year_content_oa stored as parquet as --create table indi_software_avg_year_content_oa stored as parquet as
with total as --with total as
(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total --(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total
from software_datasources pd --from software_datasources pd
join datasource d on datasource=d.id --join datasource d on datasource=d.id
join software p on p.id=pd.id --join software p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year) --group by d.type, year)
select year, type, round(no_of_pubs/total*100,3) averageOfSoftware --select year, type, round(no_of_pubs/total*100,3) averageOfSoftware
from total; --from total;
create table indi_dataset_avg_year_content_oa stored as parquet as --create table indi_dataset_avg_year_content_oa stored as parquet as
with total as --with total as
(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total --(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total
from dataset_datasources pd --from dataset_datasources pd
join datasource d on datasource=d.id --join datasource d on datasource=d.id
join dataset p on p.id=pd.id --join dataset p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year) --group by d.type, year)
select year, type, round(no_of_pubs/total*100,3) averageOfDatasets --select year, type, round(no_of_pubs/total*100,3) averageOfDatasets
from total; --from total;
create table indi_pub_avg_year_content_oa stored as parquet as --create table indi_pub_avg_year_content_oa stored as parquet as
with total as --with total as
(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total --(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total
from publication_datasources pd --from publication_datasources pd
join datasource d on datasource=d.id --join datasource d on datasource=d.id
join publication p on p.id=pd.id --join publication p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021 --where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year) --group by d.type, year)
select year, type, round(no_of_pubs/total*100,3) averageOfPubs --select year, type, round(no_of_pubs/total*100,3) averageOfPubs
from total; --from total;
create table indi_pub_has_cc_licence stored as parquet as create table indi_pub_has_cc_licence stored as parquet as
select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
@ -231,11 +231,40 @@ join publication_licenses as license on license.id = p.id
WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp
on p.id= tmp.id; on p.id= tmp.id;
-- EOSC-TR1.1-02M:
-- ## Indicator: has_cc_license. Creative Commons licensing has become a
-- de facto standard in scholarly communication and is promoted by many initiatives
-- like Plan S. This indicator might be only useful when applied
-- to openly available publications.
--create table indi_pub_has_cc_licence_tr stored as parquet as
--select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr
--from publication p
--left outer join (select p.id, license.type as lic from publication p
--join publication_licenses as license on license.id = p.id
--where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
--on p.id= tmp.id
-- #EOSC-F2-01M_cc Rich metadata for scholarly publications
-- ## Indicator: has_cc_license. Creative Commons licensing has become a
-- de facto standard in scholarly communication and is promoted by many initiatives
-- like Plan S. This indicator might be only useful when applied
-- to openly available publications.
-- Same indicator as EOSC-TR1.1-02M (Najko's instructions)
-- create table indi_pub_has_cc_licence_f stored as parquet as
-- select
-- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f
-- from publication p
-- left outer join (selectp.id,license.type as lic from publication p
-- join publication_licenses as license on license.id = p.id
-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
-- on p.id= tmp.id
create table indi_pub_has_abstract stored as parquet as create table indi_pub_has_abstract stored as parquet as
select distinct publication.id, coalesce(abstract, 1) has_abstract select distinct publication.id, coalesce(abstract, 1) has_abstract
from publication; from publication;
create table indi_with_orcid stored as parquet as create table indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from result r from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp left outer join (select id, 1 as has_orcid from result_orcid) tmp
@ -270,13 +299,64 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id where o1.id<>o2.id
group by o1.id, o2.id, o1.type group by o1.id, o2.id, o1.type
create table indi_result_org_country_collab stored as parquet as create table indi_funder_country_collab stored as parquet as
with tmp as with tmp as (select funder, project, country from organization_projects op
(select o.id as id, o.country , ro.id as result,r.type from organization o join organization o on o.id=op.id
join result_organization ro on o.id=ro.organization join project p on p.id=op.project
join result r on r.id=ro.id where o.country <> 'UNKNOWN') where country <> 'UNKNOWN')
select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations
from tmp as o1 from tmp as f1
join tmp as o2 on o1.result=o2.result join tmp as f2 on f1.project=f2.project
where o1.id<>o2.id and o1.country<>o2.country where f1.country<>f2.country
group by o1.id, o1.type,o2.country group by f1.funder, f2.country, f1.country
create table indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from publication_datasources pd
left outer join (
select pd.id, 1 as in_diamond_journal from publication_datasources pd
join datasource d on d.id=pd.datasource
join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id
create table indi_pub_hybrid stored as parquet as
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
from publication_datasources pd
left outer join (
select pd.id, 1 as is_hybrid from publication_datasources pd
join datasource d on d.id=pd.datasource
join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
on pd.id=tmp.id
create table indi_is_gold_oa stored as parquet as
(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa
from publication_datasources pd
left outer join (
select pd.id, 1 as gold_oa from publication_datasources pd
join datasource d on d.id=pd.datasource
join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online)
where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp
on pd.id=tmp.id)
create table indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from publication pd
left outer join (
select pd.id, 1 as is_transformative from publication_datasources pd
join datasource d on d.id=pd.datasource
join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and ps.is_transformative_journal=true) tmp
on pd.id=tmp.id
create table indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri
left outer join
(select ri.id, 1 as pub_closed_other_open from result_instance ri
join publication p on p.id=ri.id
join datasource d on ri.hostedby=d.id
where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id

View File

@ -105,23 +105,6 @@ create table TARGET.project_results stored as parquet as select id as result, pr
compute stats TARGET.project_results; compute stats TARGET.project_results;
-- indicators -- indicators
create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig;
create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig;
create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig;
create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig;
create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig;
create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig;
create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig;
create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig;
create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig;
create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig;
create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig;
create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig;
create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig;
create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_green_oa; compute stats TARGET.indi_pub_green_oa;
create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
@ -137,9 +120,20 @@ compute stats TARGET.indi_pub_has_cc_licence;
create table TARGET.indi_pub_has_cc_licence_url stored as parquet as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_pub_has_cc_licence_url stored as parquet as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_has_cc_licence_url; compute stats TARGET.indi_pub_has_cc_licence_url;
create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig; create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab;
create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig;
create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_result_with_orcid;
create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_funded_result_with_fundref;
create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_diamond;
create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_hybrid;
create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_in_transformative;
create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_closed_other_open;
--denorm --denorm
alter table TARGET.result rename to TARGET.res_tmp; alter table TARGET.result rename to TARGET.res_tmp;