From b7cd2c6ca1a377097775d9dcc7ed6cfebdbd9728 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 20 Apr 2022 14:46:55 +0300 Subject: [PATCH 1/6] added open citations --- .../graph/stats/oozie_app/scripts/step13.sql | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index a5839da11..aee66fd5e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -80,4 +80,34 @@ where reltype='resultResult' and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE and r1.resulttype.classname != 'other' and r2.resulttype.classname != 'other' - and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; \ No newline at end of file + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; + +create table ${stats_db_name}.result_citations_oc stored as parquet as +select substr(target, 4) as id, count(distinct substr(source, 4)) as citations +from ${openaire_db_name}.relation rel +join ${openaire_db_name}.result r1 on rel.source=r1.id +join ${openaire_db_name}.result r2 on r2.id=rel.target +where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations' + and reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE +group by substr(target, 4); + +create table ${stats_db_name}.result_references_oc stored as parquet as +select substr(source, 4) as id, count(distinct substr(target, 4)) as references +from ${openaire_db_name}.relation rel + join ${openaire_db_name}.result r1 on rel.source=r1.id + join ${openaire_db_name}.result r2 on r2.id=rel.target +where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations' + and reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE +group by substr(source, 4); \ No newline at end of file From cfbbcaf7c42c85f8bd9095ecd97c5adadf049bdf Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 6 May 2022 12:49:36 +0300 Subject: [PATCH 2/6] commented out indi_result_org_country_collab --- .../scripts/step16-createIndicatorsTables.sql | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 24e6bff7e..09b24f741 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -82,18 +82,18 @@ on r.id= tmp.id; compute stats indi_funded_result_with_fundref; -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country; - -compute stats indi_result_org_country_collab; +-- create table indi_result_org_country_collab stored as parquet as +-- with tmp as +-- (select o.id as id, o.country , ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id where o.country <> 'UNKNOWN') +-- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id and o1.country<>o2.country +-- group by o1.id, o1.type,o2.country; +-- +-- compute stats indi_result_org_country_collab; create table indi_result_org_collab stored as parquet as with tmp as From 61b4c19e6554b7b9ed53d1d1966240ce956c1211 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 6 May 2022 12:52:10 +0300 Subject: [PATCH 3/6] restored indi_result_org_country_collab, removed indi_result_org_collab --- .../scripts/step16-createIndicatorsTables.sql | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 09b24f741..c40618510 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -82,31 +82,31 @@ on r.id= tmp.id; compute stats indi_funded_result_with_fundref; --- create table indi_result_org_country_collab stored as parquet as --- with tmp as --- (select o.id as id, o.country , ro.id as result,r.type from organization o --- join result_organization ro on o.id=ro.organization --- join result r on r.id=ro.id where o.country <> 'UNKNOWN') --- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations --- from tmp as o1 --- join tmp as o2 on o1.result=o2.result --- where o1.id<>o2.id and o1.country<>o2.country --- group by o1.id, o1.type,o2.country; --- --- compute stats indi_result_org_country_collab; - -create table indi_result_org_collab stored as parquet as +create table indi_result_org_country_collab stored as parquet as with tmp as -(select o.id, ro.id as result,r.type from organization o +(select o.id as id, o.country , ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id) -select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations +join result r on r.id=ro.id where o.country <> 'UNKNOWN') +select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations from tmp as o1 join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id -group by o1.id, o2.id, o1.type; +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country; -compute stats indi_result_org_collab; +compute stats indi_result_org_country_collab; + +-- create table indi_result_org_collab stored as parquet as +-- with tmp as +-- (select o.id, ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id) +-- select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id +-- group by o1.id, o2.id, o1.type; +-- +-- compute stats indi_result_org_collab; create table indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from organization_projects op From 23334479bb7e5219ae14541e41abd7ee3903a3e2 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 11 May 2022 13:05:52 +0300 Subject: [PATCH 4/6] removed yet another collab, added more orgs in monitor --- .../scripts/step16-createIndicatorsTables.sql | 24 ++++++------- .../scripts/step20-createMonitorDB.sql | 35 +++++++++++-------- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index c40618510..db40cf973 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -82,18 +82,18 @@ on r.id= tmp.id; compute stats indi_funded_result_with_fundref; -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country; - -compute stats indi_result_org_country_collab; +-- create table indi_result_org_country_collab stored as parquet as +-- with tmp as +-- (select o.id as id, o.country , ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id where o.country <> 'UNKNOWN') +-- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id and o1.country<>o2.country +-- group by o1.id, o1.type,o2.country; +-- +-- compute stats indi_result_org_country_collab; -- create table indi_result_org_collab stored as parquet as -- with tmp as diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index bcc9f0b5d..4dd434101 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -18,20 +18,27 @@ create table TARGET.result stored as parquet as select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) union all select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::759d59f05d77188faee99b7493b46805', - 'openorgs____::b84450f9864182c67b8611b5593f4250', - 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', - 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', - 'openorgs____::d169c7407dd417152596908d48c11460', - 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', - 'openorgs____::2fb1e47b4612688d9de9169d579939a7', - 'openorgs____::759d59f05d77188faee99b7493b46805', - 'openorgs____::cad284878801b9465fa51a95b1d779db', - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', - 'openorgs____::c0286313e36479eff8676dba9b724b40' - -- ,'openorgs____::c80a8243a5e5c620d7931c88d93bf17a' -- Paris Diderot - ) )) foo; + 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" + 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? + 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade + 'openorgs____::2fb1e47b4612688d9de9169d579939a7', --University of Helsinki + 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho + 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen + 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens + -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot + 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University + 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark + 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin + 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt + 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven + 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape + 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute + 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University + 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); From c25134f28d8586591284a1740f8bd89b7cc0b0d5 Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 12 May 2022 14:55:47 +0300 Subject: [PATCH 5/6] fixed typo --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 4dd434101..2dde7171f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -38,7 +38,8 @@ create table TARGET.result stored as parquet as 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg - 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) ) )) foo; + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) + ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); From 3fc9efeab6559edc2fd0ad839473a6bbc03c89f5 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 13 May 2022 14:28:13 +0300 Subject: [PATCH 6/6] fixed typo, addded open citations and apcs in monitor --- .../oozie_app/scripts/step20-createMonitorDB.sql | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 4dd434101..3cf155869 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -38,15 +38,25 @@ create table TARGET.result stored as parquet as 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg - 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) ) )) foo; + 'openorgs____::6445d7758d3a40c4d997953b6632a368' --National Institute of Informatics (NII) + ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_citations; +create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_references_oc; + +create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_citations_oc; + create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_classifications; +create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_apc; + create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_concepts;