1
0
Fork 0

Merge pull request 'restored some collab indicators' (#240) from antonis.lempesis/dnet-hadoop:beta into beta

Reviewed-on: D-Net/dnet-hadoop#240
This commit is contained in:
Claudio Atzori 2022-08-05 15:50:39 +02:00
commit 84598c7535
2 changed files with 37 additions and 35 deletions

View File

@ -118,40 +118,40 @@ compute stats indi_funded_result_with_fundref;
--
-- compute stats indi_result_org_collab;
--
-- create table indi_project_collab_org stored as parquet as
-- select o1.id org1,o2.id org2, count(distinct o1.project) as collaborations
-- from organization_projects as o1
-- join organization_projects as o2 on o1.project=o2.project
-- where o1.id!=o2.id
-- group by o1.id, o2.id;
--
-- compute stats indi_project_collab_org;
--
-- create table indi_project_collab_org_country stored as parquet as
-- with tmp as
-- (select o.id organization, o.country , ro.project as project from organization o
-- join organization_projects ro on o.id=ro.id
-- and o.country <> 'UNKNOWN')
-- select o1.organization org1,o2.country country2, count(distinct o1.project) as collaborations
-- from tmp as o1
-- join tmp as o2 on o1.project=o2.project
-- where o1.organization<>o2.organization and o1.country<>o2.country
-- group by o1.organization, o2.country;
--
-- compute stats indi_project_collab_org_country;
--
-- create table indi_funder_country_collab stored as parquet as
-- with tmp as (select funder, project, country from organization_projects op
-- join organization o on o.id=op.id
-- join project p on p.id=op.project
-- where country <> 'UNKNOWN')
-- select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
-- from tmp as f1
-- join tmp as f2 on f1.project=f2.project
-- where f1.country<>f2.country
-- group by f1.funder, f2.country, f1.country;
--
-- compute stats indi_funder_country_collab;
create table indi_project_collab_org stored as parquet as
select o1.id org1,o2.id org2, count(distinct o1.project) as collaborations
from organization_projects as o1
join organization_projects as o2 on o1.project=o2.project
where o1.id!=o2.id
group by o1.id, o2.id;
compute stats indi_project_collab_org;
create table indi_project_collab_org_country stored as parquet as
with tmp as
(select o.id organization, o.country , ro.project as project from organization o
join organization_projects ro on o.id=ro.id
and o.country <> 'UNKNOWN')
select o1.organization org1,o2.country country2, count(distinct o1.project) as collaborations
from tmp as o1
join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.country<>o2.country
group by o1.organization, o2.country;
compute stats indi_project_collab_org_country;
create table indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from organization_projects op
join organization o on o.id=op.id
join project p on p.id=op.project
where country <> 'UNKNOWN')
select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
from tmp as f1
join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country;
compute stats indi_funder_country_collab;
--
-- create table indi_result_country_collab stored as parquet as
-- with tmp as

View File

@ -170,7 +170,9 @@ compute stats TARGET.indi_result_has_cc_licence;
create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_result_has_cc_licence_url;
-- create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab;
create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab;
create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org;
create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country;
create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_result_with_orcid;