forked from D-Net/dnet-hadoop
Merge pull request '[stats wf] latest version of indicators + added FOS classification' (#232) from antonis.lempesis/dnet-hadoop:beta into beta
Reviewed-on: D-Net/dnet-hadoop#232
This commit is contained in:
commit
27681cf6bf
File diff suppressed because it is too large
Load Diff
|
@ -38,7 +38,14 @@ create table TARGET.result stored as parquet as
|
||||||
'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
|
'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
|
||||||
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
|
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
|
||||||
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
|
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
|
||||||
'openorgs____::6445d7758d3a40c4d997953b6632a368' --National Institute of Informatics (NII)
|
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
|
||||||
|
|
||||||
|
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
|
||||||
|
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
|
||||||
|
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
||||||
|
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
|
||||||
|
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
|
||||||
|
'openorgs____::4ac562f0376fce3539504567649cb373' -- University of Patras
|
||||||
) )) foo;
|
) )) foo;
|
||||||
compute stats TARGET.result;
|
compute stats TARGET.result;
|
||||||
|
|
||||||
|
@ -107,6 +114,9 @@ compute stats TARGET.result_sources;
|
||||||
create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.result_topics;
|
compute stats TARGET.result_topics;
|
||||||
|
|
||||||
|
create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
|
compute stats TARGET.result_fos;
|
||||||
|
|
||||||
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
|
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
|
||||||
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
|
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
|
||||||
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
|
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
|
||||||
|
|
|
@ -11,13 +11,13 @@ where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
|
||||||
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
||||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||||
from ${openaire_db_name}.relation r
|
from ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'projectOrganization'
|
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'resultProject'
|
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
||||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||||
|
|
||||||
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||||
|
|
|
@ -123,6 +123,16 @@ UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_topics;
|
FROM ${stats_db_name}.otherresearchproduct_topics;
|
||||||
|
|
||||||
|
create table ${stats_db_name}.result_fos stored as parquet as
|
||||||
|
with
|
||||||
|
lvl1 as (select id, topic from ${stats_db_name}.result_topics where topic like '__ %' and type='Fields of Science and Technology classification'),
|
||||||
|
lvl2 as (select id, topic from ${stats_db_name}.result_topics where topic like '____ %' and type='Fields of Science and Technology classification'),
|
||||||
|
lvl3 as (select id, topic from ${stats_db_name}.result_topics where topic like '______ %' and type='Fields of Science and Technology classification')
|
||||||
|
select lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
|
||||||
|
from lvl1
|
||||||
|
join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)
|
||||||
|
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4);
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
|
@ -135,3 +145,4 @@ select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS da
|
||||||
FROM ${stats_db_name}.result r
|
FROM ${stats_db_name}.result r
|
||||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue