From b7cd2c6ca1a377097775d9dcc7ed6cfebdbd9728 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 20 Apr 2022 14:46:55 +0300 Subject: [PATCH 01/11] added open citations --- .../graph/stats/oozie_app/scripts/step13.sql | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index a5839da11e..aee66fd5e5 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -80,4 +80,34 @@ where reltype='resultResult' and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE and r1.resulttype.classname != 'other' and r2.resulttype.classname != 'other' - and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; \ No newline at end of file + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; + +create table ${stats_db_name}.result_citations_oc stored as parquet as +select substr(target, 4) as id, count(distinct substr(source, 4)) as citations +from ${openaire_db_name}.relation rel +join ${openaire_db_name}.result r1 on rel.source=r1.id +join ${openaire_db_name}.result r2 on r2.id=rel.target +where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations' + and reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE +group by substr(target, 4); + +create table ${stats_db_name}.result_references_oc stored as parquet as +select substr(source, 4) as id, count(distinct substr(target, 4)) as references +from ${openaire_db_name}.relation rel + join ${openaire_db_name}.result r1 on rel.source=r1.id + join ${openaire_db_name}.result r2 on r2.id=rel.target +where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations' + and reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE +group by substr(source, 4); \ No newline at end of file From cfbbcaf7c42c85f8bd9095ecd97c5adadf049bdf Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 6 May 2022 12:49:36 +0300 Subject: [PATCH 02/11] commented out indi_result_org_country_collab --- .../scripts/step16-createIndicatorsTables.sql | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 24e6bff7e0..09b24f741f 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -82,18 +82,18 @@ on r.id= tmp.id; compute stats indi_funded_result_with_fundref; -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country; - -compute stats indi_result_org_country_collab; +-- create table indi_result_org_country_collab stored as parquet as +-- with tmp as +-- (select o.id as id, o.country , ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id where o.country <> 'UNKNOWN') +-- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id and o1.country<>o2.country +-- group by o1.id, o1.type,o2.country; +-- +-- compute stats indi_result_org_country_collab; create table indi_result_org_collab stored as parquet as with tmp as From 61b4c19e6554b7b9ed53d1d1966240ce956c1211 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 6 May 2022 12:52:10 +0300 Subject: [PATCH 03/11] restored indi_result_org_country_collab, removed indi_result_org_collab --- .../scripts/step16-createIndicatorsTables.sql | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 09b24f741f..c406185109 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -82,31 +82,31 @@ on r.id= tmp.id; compute stats indi_funded_result_with_fundref; --- create table indi_result_org_country_collab stored as parquet as --- with tmp as --- (select o.id as id, o.country , ro.id as result,r.type from organization o --- join result_organization ro on o.id=ro.organization --- join result r on r.id=ro.id where o.country <> 'UNKNOWN') --- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations --- from tmp as o1 --- join tmp as o2 on o1.result=o2.result --- where o1.id<>o2.id and o1.country<>o2.country --- group by o1.id, o1.type,o2.country; --- --- compute stats indi_result_org_country_collab; - -create table indi_result_org_collab stored as parquet as +create table indi_result_org_country_collab stored as parquet as with tmp as -(select o.id, ro.id as result,r.type from organization o +(select o.id as id, o.country , ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id) -select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations +join result r on r.id=ro.id where o.country <> 'UNKNOWN') +select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations from tmp as o1 join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id -group by o1.id, o2.id, o1.type; +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country; -compute stats indi_result_org_collab; +compute stats indi_result_org_country_collab; + +-- create table indi_result_org_collab stored as parquet as +-- with tmp as +-- (select o.id, ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id) +-- select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id +-- group by o1.id, o2.id, o1.type; +-- +-- compute stats indi_result_org_collab; create table indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from organization_projects op From 23334479bb7e5219ae14541e41abd7ee3903a3e2 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 11 May 2022 13:05:52 +0300 Subject: [PATCH 04/11] removed yet another collab, added more orgs in monitor --- .../scripts/step16-createIndicatorsTables.sql | 24 ++++++------- .../scripts/step20-createMonitorDB.sql | 35 +++++++++++-------- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index c406185109..db40cf9731 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -82,18 +82,18 @@ on r.id= tmp.id; compute stats indi_funded_result_with_fundref; -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country; - -compute stats indi_result_org_country_collab; +-- create table indi_result_org_country_collab stored as parquet as +-- with tmp as +-- (select o.id as id, o.country , ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id where o.country <> 'UNKNOWN') +-- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id and o1.country<>o2.country +-- group by o1.id, o1.type,o2.country; +-- +-- compute stats indi_result_org_country_collab; -- create table indi_result_org_collab stored as parquet as -- with tmp as diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index bcc9f0b5d7..4dd434101a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -18,20 +18,27 @@ create table TARGET.result stored as parquet as select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) union all select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::759d59f05d77188faee99b7493b46805', - 'openorgs____::b84450f9864182c67b8611b5593f4250', - 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', - 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', - 'openorgs____::d169c7407dd417152596908d48c11460', - 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', - 'openorgs____::2fb1e47b4612688d9de9169d579939a7', - 'openorgs____::759d59f05d77188faee99b7493b46805', - 'openorgs____::cad284878801b9465fa51a95b1d779db', - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', - 'openorgs____::c0286313e36479eff8676dba9b724b40' - -- ,'openorgs____::c80a8243a5e5c620d7931c88d93bf17a' -- Paris Diderot - ) )) foo; + 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" + 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? + 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade + 'openorgs____::2fb1e47b4612688d9de9169d579939a7', --University of Helsinki + 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho + 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen + 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens + -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot + 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University + 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark + 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin + 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt + 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven + 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape + 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute + 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University + 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); From c25134f28d8586591284a1740f8bd89b7cc0b0d5 Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 12 May 2022 14:55:47 +0300 Subject: [PATCH 05/11] fixed typo --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 4dd434101a..2dde7171fb 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -38,7 +38,8 @@ create table TARGET.result stored as parquet as 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg - 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) ) )) foo; + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) + ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); From 3fc9efeab6559edc2fd0ad839473a6bbc03c89f5 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 13 May 2022 14:28:13 +0300 Subject: [PATCH 06/11] fixed typo, addded open citations and apcs in monitor --- .../oozie_app/scripts/step20-createMonitorDB.sql | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 4dd434101a..3cf155869f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -38,15 +38,25 @@ create table TARGET.result stored as parquet as 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg - 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) ) )) foo; + 'openorgs____::6445d7758d3a40c4d997953b6632a368' --National Institute of Informatics (NII) + ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_citations; +create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_references_oc; + +create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_citations_oc; + create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_classifications; +create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_apc; + create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_concepts; From f5207885e371e8e538069d2b301094bb0af185b0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 17 May 2022 15:09:22 +0200 Subject: [PATCH 07/11] [EOSCTag] changed code to remove EOSC Jupyter Notebook and modified test to exclude galaxy + software from the tagging for Galaxy --- .../eu/dnetlib/dhp/bulktag/SparkEoscTag.java | 5 +++-- .../dnetlib/dhp/bulktag/EOSCTagJobTest.java | 21 ++++++------------- .../eosctag/jupyter/software/software_10.json | 2 +- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java index e8c79e11da..b9de5dd111 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java @@ -95,13 +95,14 @@ public class SparkEoscTag { if (containsCriteriaNotebook(s)) { sbject.add(EOSC_NOTEBOOK); - if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))){ + if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) { sbject = sbject.stream().map(sb -> { - if (sb.getValue().equals("EOSC Jupyter Notebook")){ + if (sb.getValue().equals("EOSC Jupyter Notebook")) { return null; } return sb; }).filter(Objects::nonNull).collect(Collectors.toList()); + s.setSubject(sbject); } } if (containsCriteriaGalaxy(s)) { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java index 55d3939e1e..1ea2541576 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java @@ -132,7 +132,7 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 2, tmp + 1, tmp .filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4")) .collect() .get(0) @@ -326,7 +326,7 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 2, + 1, tmp .filter( s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow"))) @@ -352,21 +352,12 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 6, tmp + 5, tmp .filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11")) .collect() .get(0) .getSubject() .size()); - Assertions - .assertTrue( - tmp - .filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11")) - .collect() - .get(0) - .getSubject() - .stream() - .anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); Assertions .assertEquals( @@ -394,7 +385,7 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 2, + 1, orp .filter( s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow"))) @@ -438,14 +429,14 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 3, orp + 2, orp .filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72")) .collect() .get(0) .getSubject() .size()); Assertions - .assertTrue( + .assertFalse( orp .filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72")) .collect() diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json index 2439dc1b64..2acc856a4a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json @@ -1,4 +1,4 @@ -{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055698387,"id":"50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4","originalId":["od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T18:34:35.459Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:442576","datestamp":"2018-11-12T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Bitteur, Sylvaine","name":"Sylvaine","surname":"Bitteur","rank":1,"pid":null,"affiliation":null},{"fullname":"Hassouna, Melynda","name":"Melynda","surname":"Hassouna","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value": "jupyter Notebooks","qualifier": {"classid": "","classname": "","schemeid": "","schemename": "" },"dataInfo": {"invisible": false,"inferred": false,"deletedbyinference": false,"trust": "0.9","inferenceprovenance": "","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}],"title":[{"value":"Charte graphique et site web International Symposium EmiLi 2012","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/dimpo"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-nd/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|doajarticles::8cec81178926caaca531afbd8eb5d64c","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/442576"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055698387,"id":"50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4","originalId":["od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T18:34:35.459Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:442576","datestamp":"2018-11-12T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Bitteur, Sylvaine","name":"Sylvaine","surname":"Bitteur","rank":1,"pid":null,"affiliation":null},{"fullname":"Hassouna, Melynda","name":"Melynda","surname":"Hassouna","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value": "EOSC Jupyter Notebook","qualifier": {"classid": "","classname": "","schemeid": "","schemename": "" },"dataInfo": {"invisible": false,"inferred": false,"deletedbyinference": false,"trust": "0.9","inferenceprovenance": "","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}],"title":[{"value":"Charte graphique et site web International Symposium EmiLi 2012","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/dimpo"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-nd/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|doajarticles::8cec81178926caaca531afbd8eb5d64c","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/442576"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055711745,"id":"50|od______1582::501b25d420f808c8eddcd9b16e917f11","originalId":["od______1582::501b25d420f808c8eddcd9b16e917f11"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T19:54:07.667Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:255703","datestamp":"2018-03-20T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Petit R.J., Remy","name":"Remy","surname":"Petit R J","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"python","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"richesse allélique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"gène polymorphe","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"loci","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"diversité des populations","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Contrib","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"This program provides a measure of the contribution of each population to total diversity (measured by H, the expected heterozygosity, or by R, the allelic richness). This contribution is splitted in two components: one due to the diversity of the population, the other due to its differentiation from the remaining populations. The program may also be used to obtain allelic richness after rarefaction (to a sample size chosen by the investigator) for a set of populations. It can be used in conjunction with the program haplodiv, based on the paper by Pons & Petit 1995, TAG 90, 462-470, which will provide standard errors for the diversity and differentiation parameters. The input file is a text file (see example: rartest.txt), where the first line indicates the number of haplotypes (here it is 18), the number of populations (here it is 4), and the rarefaction size (it should not be larger than the smallest population sample size; here the rarefaction size is 10, and the smallest sample size is 20). Then follows the data for each population (line), with the number of each haplotype in each population (don't use relative frequencies): 18 4 10 1 0 1 0 0 0 1 1 ...(18 columns) 0 1 2 1 1 0 13 0 ... 0 0 8 0 0 3 6 0 ... 1 0 9 0 0 3 7 1 Results can be seen in the output file (rartest.out here; to be printed horizontally). General measures are given first: within population diversity (Hs), total diversity (Ht), and Gst are given, followed by similar measures based on allelic richness. Then you get the results for each population : H, its standard error, allelic richness after rarefaction, the divergence from the other populations (DHs, DHt, DGst, see the paper in Conservation Biology), and the contributions Ct, Cs, Cd followed by the contributions for allelic richness measures. The program is written for an haploid gene but may be used for nuclear genes, assuming Hardy-Weinberg equilibrium. How to proceed when there are several loci? Do not take the mean across Gst or across Contributions. They are ratios, so you should take the mean of the numerator and the mean of the denominator separately. For the denominator: take the mean of hT and Rt-1 across loci. For the numerator: multiply the contributions by hT or Rt-1 (respectively for contributions to diversity or to allelic richness) and take the mean of these products across loci. Then compute the ratio of the two means.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2006-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/covid_19_senacyt_abc_panama"},{"id":"covid-19"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-sa/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|opendoar____::87ae6fb631f7c8a627e8e28785d9992d","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/255703"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2006-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055718681,"id":"50|od______1582::581621232a561b7e8b4952b18b8b0e56","originalId":["od______1582::581621232a561b7e8b4952b18b8b0e56"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T19:54:07.673Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:255707","datestamp":"2018-03-20T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Petit R.J., Remy","name":"Remy","surname":"Petit R J","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"python","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"notebook","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"gène polymorphe","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"différenciation génétique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"loci","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"application informatique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"diversité des populations","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"haploïde","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Logiciels Permut et cpSSR","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"THE PROGRAM PERMUT AND THE PROGRAM CpSSR BECOME ONLY ONE PROGRAM.[br/] When you run the program you can choose if you want to use permut or CpSSR.[br/][br/] README PERMUT[br/] This program is based on the papers (Pons & Petit Genetics 1996, 144:1237-1245) and (Burban et al. 1999, Mol Ecol 8, 1593-1602).[br/] It computes measures of diversity and differenciation from haploid population genetic data, when a measure of the distance between haplotypes is available, and test whether the differentiation and diversity measures differ from the equivalent measures that do not take into account the distances between haplotypes (ie, that consider all haplotypes equally divergent).[br/] The source file should be an ASCII file (its name should have 8 characters maximum: 12345678.txt) and should include the following information:[br/] First line :[br/] Number of cytotypes Number of populations Number of characters distinguishing the variants (for instance number of polymorphic fragments, or of polymorphic nucleotide sites). The program asks for the number of permutations to be made.[br/] see the example (\\ExamplePermut\\input.txt and \\ExamplePermut\\output.out).[br/] Then follows the number of individuals having a given cytotype (column) in a given population (row). Finally, and without interruption, provide the table of character states for all haplotypes, where each line corresponds to one haplotype, and each column to a character. No column should be empty (no missing haplotype) and each population (row) should be composed of AT LEAST 3 individuals![br/] The output file provides permutated values of Nst in a single row, and the value of the last 5% and last 1%. The mean of the permutated values is also given and should be close to the Gst value (by construction). To test if the observed Nst value is larger than the Gst, we count how many permutated values are larger than the observed Nst. If you have 5% of the permutated values greater than the observed value of Nst, then your test is not significant, otherwise it is and you know the P-value. This is akin to testing if Gst = Nst.[br/] [br/] README CpSSR :[br/] It computes measures of diversity and differenciation from haploid population genetic data, when the difference in number of repeats between alleles is available, and tests whether the differentiation and diversity measures differ from the equivalent measures when the distances between haplotypes is not considered (ie, when all haplotypes are considered equally divergent). The source file should be an ASCII file (its name should have 8 characters maximum: 12345678.txt) and should include the following information:[br/] First line :[br/] Number of cytotypes Number of populations Number of cpSSR loci. The program asks for the number of permutations to be made. See the example (\\ExampleCpSSR\\input.txt and \\ExamplePermut\\CpSSR.out).[br/] Then follows the number of individuals having a given haplotype (column) in a given population (row). Finally, and without interruption, provide the table of length variant states for all haplotypes, where each line corresponds to one haplotype, and each column to a character. No column should be empty (no missing haplotype) and each population (row) should be composed of AT LEAST 3 individuals![br/] The output file provides permutated values of Rst in a single row, and the value of the last 5% and last 1%. The mean of the permutated values is also given and should be close to the Gst value (by construction). To test if the observed Rst value is larger than the Gst, you count how many permutated values are larger than the observed Rst. If you have 5% of the permutated values greater than the observed value of Rst, then your test is not significant, otherwise it is and you know the P-value. This is akin to testing if Gst = Rst. I usually go for a one-sided test (i.e. I test if Rst>Gst, and not Rst<>Gst).","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/euromixproject"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-sa/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/255707"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055721330,"id":"50|od______1582::5aec1186054301b66c0c5dc35972a589","originalId":["od______1582::5aec1186054301b66c0c5dc35972a589"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T18:54:28.567Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:402973","datestamp":"2018-03-19T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Muratorio, Sylvie","name":"Sylvie","surname":"Muratorio","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"notebook","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"modèle physiologique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"approche génétique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"castanea","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"fagus sylvatica","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"PDG Documentation, version 2","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-sa/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/402973"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} From c298c148cb285e73fab8079d63f6bbef4670f653 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 May 2022 09:11:46 +0200 Subject: [PATCH 08/11] [CountryPropagation] fix NPE issue --- .../SparkCountryPropagationJob.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 56aa953b46..25cd822480 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -102,21 +102,27 @@ public class SparkCountryPropagationJob { private static MapFunction, R> getCountryMergeFn() { return t -> { Optional.ofNullable(t._2()).ifPresent(r -> { - t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet())); + if(Optional.ofNullable(t._1().getCountry()).isPresent()) + t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet())); + else + t._1().setCountry(merge(null, t._2().getCountrySet())); }); return t._1(); }; } private static List merge(List c1, List c2) { - HashSet countries = c1 - .stream() - .map(Qualifier::getClassid) - .collect(Collectors.toCollection(HashSet::new)); + HashSet countries = new HashSet<>(); + if(Optional.ofNullable(c1).isPresent()){ + countries = c1.stream().map(Qualifier::getClassid) + .collect(Collectors.toCollection(HashSet::new)); + } + + HashSet finalCountries = countries; return c2 .stream() - .filter(c -> !countries.contains(c.getClassid())) + .filter(c -> !finalCountries.contains(c.getClassid())) .map(c -> getCountry(c.getClassid(), c.getClassname())) .collect(Collectors.toList()); } From 5e0b8f9b5fa27dc7e595c65753a682b12ec3e553 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 May 2022 09:15:53 +0200 Subject: [PATCH 09/11] [CountryPropagation] refactoring --- .../SparkCountryPropagationJob.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 25cd822480..d9f6433a07 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -102,7 +102,7 @@ public class SparkCountryPropagationJob { private static MapFunction, R> getCountryMergeFn() { return t -> { Optional.ofNullable(t._2()).ifPresent(r -> { - if(Optional.ofNullable(t._1().getCountry()).isPresent()) + if (Optional.ofNullable(t._1().getCountry()).isPresent()) t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet())); else t._1().setCountry(merge(null, t._2().getCountrySet())); @@ -113,11 +113,12 @@ public class SparkCountryPropagationJob { private static List merge(List c1, List c2) { HashSet countries = new HashSet<>(); - if(Optional.ofNullable(c1).isPresent()){ - countries = c1.stream().map(Qualifier::getClassid) - .collect(Collectors.toCollection(HashSet::new)); - } - + if (Optional.ofNullable(c1).isPresent()) { + countries = c1 + .stream() + .map(Qualifier::getClassid) + .collect(Collectors.toCollection(HashSet::new)); + } HashSet finalCountries = countries; return c2 From db088cc69c6878e5242e5edb8b8d1ebb894a3136 Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 7 Jun 2022 04:04:28 +0300 Subject: [PATCH 10/11] fixed *_organization tables --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql | 1 + .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index e1c36cbc04..b5eba6111b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -127,6 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' + and r.target like '50|%' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index fa3eca1a93..6fa0e6fdfb 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -93,7 +93,7 @@ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; +WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.target like '20|%' and r.datainfo.invisible=false; -- datasource sources: -- where the datasource info have been collected from. From 574492c659ea7a6d8e54f4332ba76a6ad45555fd Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 9 Jun 2022 15:48:13 +0300 Subject: [PATCH 11/11] removed double result_apc table creation from monitor --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 3cf155869f..7412910a91 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -107,11 +107,6 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_apc; - - - create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;