Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta

This commit is contained in:
Sandro La Bruzzo 2021-10-05 10:21:50 +02:00
commit f258bbb927
2 changed files with 48 additions and 2 deletions

View File

@ -13,4 +13,4 @@ export SHADOW=$3
echo "Creating observatory database" echo "Creating observatory database"
impala-shell -q "drop database if exists ${TARGET} cascade" impala-shell -q "drop database if exists ${TARGET} cascade"
impala-shell -q "create database if not exists ${TARGET}" impala-shell -q "create database if not exists ${TARGET}"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - impala-shell -d ${SOURCE} -q "show tables" --delimited | grep -iv roar | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -

View File

@ -234,3 +234,49 @@ on p.id= tmp.id;
create table indi_pub_has_abstract stored as parquet as create table indi_pub_has_abstract stored as parquet as
select distinct publication.id, coalesce(abstract, 1) has_abstract select distinct publication.id, coalesce(abstract, 1) has_abstract
from publication; from publication;
create table indi_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp
on r.id= tmp.id
create table indi_funded_result_with_fundref stored as parquet as
select distinct r.id, coalesce(fundref, 0) as fundref
from project_results r
left outer join (select distinct id, 1 as fundref from project_results
where provenance='Harvested') tmp
on r.id= tmp.id
create table indi_result_org_country_collab stored as parquet as
with tmp as
(select o.id as id, o.country , ro.id as result,r.type from organization o
join result_organization ro on o.id=ro.organization
join result r on r.id=ro.id where o.country <> 'UNKNOWN')
select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id and o1.country<>o2.country
group by o1.id, o1.type,o2.country
create table indi_result_org_collab stored as parquet as
with tmp as
(select o.id, ro.id as result,r.type from organization o
join result_organization ro on o.id=ro.organization
join result r on r.id=ro.id)
select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id
group by o1.id, o2.id, o1.type
create table indi_result_org_country_collab stored as parquet as
with tmp as
(select o.id as id, o.country , ro.id as result,r.type from organization o
join result_organization ro on o.id=ro.organization
join result r on r.id=ro.id where o.country <> 'UNKNOWN')
select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id and o1.country<>o2.country
group by o1.id, o1.type,o2.country