diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java index e8c79e11d..b9de5dd11 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java @@ -95,13 +95,14 @@ public class SparkEoscTag { if (containsCriteriaNotebook(s)) { sbject.add(EOSC_NOTEBOOK); - if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))){ + if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) { sbject = sbject.stream().map(sb -> { - if (sb.getValue().equals("EOSC Jupyter Notebook")){ + if (sb.getValue().equals("EOSC Jupyter Notebook")) { return null; } return sb; }).filter(Objects::nonNull).collect(Collectors.toList()); + s.setSubject(sbject); } } if (containsCriteriaGalaxy(s)) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 56aa953b4..d9f6433a0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -102,21 +102,28 @@ public class SparkCountryPropagationJob { private static MapFunction, R> getCountryMergeFn() { return t -> { Optional.ofNullable(t._2()).ifPresent(r -> { - t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet())); + if (Optional.ofNullable(t._1().getCountry()).isPresent()) + t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet())); + else + t._1().setCountry(merge(null, t._2().getCountrySet())); }); return t._1(); }; } private static List merge(List c1, List c2) { - HashSet countries = c1 - .stream() - .map(Qualifier::getClassid) - .collect(Collectors.toCollection(HashSet::new)); + HashSet countries = new HashSet<>(); + if (Optional.ofNullable(c1).isPresent()) { + countries = c1 + .stream() + .map(Qualifier::getClassid) + .collect(Collectors.toCollection(HashSet::new)); + } + HashSet finalCountries = countries; return c2 .stream() - .filter(c -> !countries.contains(c.getClassid())) + .filter(c -> !finalCountries.contains(c.getClassid())) .map(c -> getCountry(c.getClassid(), c.getClassname())) .collect(Collectors.toList()); } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java index 55d3939e1..1ea254157 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java @@ -132,7 +132,7 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 2, tmp + 1, tmp .filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4")) .collect() .get(0) @@ -326,7 +326,7 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 2, + 1, tmp .filter( s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow"))) @@ -352,21 +352,12 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 6, tmp + 5, tmp .filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11")) .collect() .get(0) .getSubject() .size()); - Assertions - .assertTrue( - tmp - .filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11")) - .collect() - .get(0) - .getSubject() - .stream() - .anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); Assertions .assertEquals( @@ -394,7 +385,7 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 2, + 1, orp .filter( s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow"))) @@ -438,14 +429,14 @@ public class EOSCTagJobTest { Assertions .assertEquals( - 3, orp + 2, orp .filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72")) .collect() .get(0) .getSubject() .size()); Assertions - .assertTrue( + .assertFalse( orp .filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72")) .collect() diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json index 2439dc1b6..2acc856a4 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/eosctag/jupyter/software/software_10.json @@ -1,4 +1,4 @@ -{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055698387,"id":"50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4","originalId":["od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T18:34:35.459Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:442576","datestamp":"2018-11-12T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Bitteur, Sylvaine","name":"Sylvaine","surname":"Bitteur","rank":1,"pid":null,"affiliation":null},{"fullname":"Hassouna, Melynda","name":"Melynda","surname":"Hassouna","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value": "jupyter Notebooks","qualifier": {"classid": "","classname": "","schemeid": "","schemename": "" },"dataInfo": {"invisible": false,"inferred": false,"deletedbyinference": false,"trust": "0.9","inferenceprovenance": "","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}],"title":[{"value":"Charte graphique et site web International Symposium EmiLi 2012","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/dimpo"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-nd/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|doajarticles::8cec81178926caaca531afbd8eb5d64c","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/442576"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055698387,"id":"50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4","originalId":["od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T18:34:35.459Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:442576","datestamp":"2018-11-12T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Bitteur, Sylvaine","name":"Sylvaine","surname":"Bitteur","rank":1,"pid":null,"affiliation":null},{"fullname":"Hassouna, Melynda","name":"Melynda","surname":"Hassouna","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value": "EOSC Jupyter Notebook","qualifier": {"classid": "","classname": "","schemeid": "","schemename": "" },"dataInfo": {"invisible": false,"inferred": false,"deletedbyinference": false,"trust": "0.9","inferenceprovenance": "","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}],"title":[{"value":"Charte graphique et site web International Symposium EmiLi 2012","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/dimpo"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-nd/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|doajarticles::8cec81178926caaca531afbd8eb5d64c","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/442576"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055711745,"id":"50|od______1582::501b25d420f808c8eddcd9b16e917f11","originalId":["od______1582::501b25d420f808c8eddcd9b16e917f11"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T19:54:07.667Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:255703","datestamp":"2018-03-20T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Petit R.J., Remy","name":"Remy","surname":"Petit R J","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"python","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"richesse allélique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"gène polymorphe","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"loci","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"diversité des populations","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Contrib","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"This program provides a measure of the contribution of each population to total diversity (measured by H, the expected heterozygosity, or by R, the allelic richness). This contribution is splitted in two components: one due to the diversity of the population, the other due to its differentiation from the remaining populations. The program may also be used to obtain allelic richness after rarefaction (to a sample size chosen by the investigator) for a set of populations. It can be used in conjunction with the program haplodiv, based on the paper by Pons & Petit 1995, TAG 90, 462-470, which will provide standard errors for the diversity and differentiation parameters. The input file is a text file (see example: rartest.txt), where the first line indicates the number of haplotypes (here it is 18), the number of populations (here it is 4), and the rarefaction size (it should not be larger than the smallest population sample size; here the rarefaction size is 10, and the smallest sample size is 20). Then follows the data for each population (line), with the number of each haplotype in each population (don't use relative frequencies): 18 4 10 1 0 1 0 0 0 1 1 ...(18 columns) 0 1 2 1 1 0 13 0 ... 0 0 8 0 0 3 6 0 ... 1 0 9 0 0 3 7 1 Results can be seen in the output file (rartest.out here; to be printed horizontally). General measures are given first: within population diversity (Hs), total diversity (Ht), and Gst are given, followed by similar measures based on allelic richness. Then you get the results for each population : H, its standard error, allelic richness after rarefaction, the divergence from the other populations (DHs, DHt, DGst, see the paper in Conservation Biology), and the contributions Ct, Cs, Cd followed by the contributions for allelic richness measures. The program is written for an haploid gene but may be used for nuclear genes, assuming Hardy-Weinberg equilibrium. How to proceed when there are several loci? Do not take the mean across Gst or across Contributions. They are ratios, so you should take the mean of the numerator and the mean of the denominator separately. For the denominator: take the mean of hT and Rt-1 across loci. For the numerator: multiply the contributions by hT or Rt-1 (respectively for contributions to diversity or to allelic richness) and take the mean of these products across loci. Then compute the ratio of the two means.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2006-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/covid_19_senacyt_abc_panama"},{"id":"covid-19"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-sa/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|opendoar____::87ae6fb631f7c8a627e8e28785d9992d","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/255703"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2006-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055718681,"id":"50|od______1582::581621232a561b7e8b4952b18b8b0e56","originalId":["od______1582::581621232a561b7e8b4952b18b8b0e56"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T19:54:07.673Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:255707","datestamp":"2018-03-20T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Petit R.J., Remy","name":"Remy","surname":"Petit R J","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"python","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"notebook","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"gène polymorphe","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"différenciation génétique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"loci","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"application informatique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"diversité des populations","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"haploïde","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Logiciels Permut et cpSSR","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"THE PROGRAM PERMUT AND THE PROGRAM CpSSR BECOME ONLY ONE PROGRAM.[br/] When you run the program you can choose if you want to use permut or CpSSR.[br/][br/] README PERMUT[br/] This program is based on the papers (Pons & Petit Genetics 1996, 144:1237-1245) and (Burban et al. 1999, Mol Ecol 8, 1593-1602).[br/] It computes measures of diversity and differenciation from haploid population genetic data, when a measure of the distance between haplotypes is available, and test whether the differentiation and diversity measures differ from the equivalent measures that do not take into account the distances between haplotypes (ie, that consider all haplotypes equally divergent).[br/] The source file should be an ASCII file (its name should have 8 characters maximum: 12345678.txt) and should include the following information:[br/] First line :[br/] Number of cytotypes Number of populations Number of characters distinguishing the variants (for instance number of polymorphic fragments, or of polymorphic nucleotide sites). The program asks for the number of permutations to be made.[br/] see the example (\\ExamplePermut\\input.txt and \\ExamplePermut\\output.out).[br/] Then follows the number of individuals having a given cytotype (column) in a given population (row). Finally, and without interruption, provide the table of character states for all haplotypes, where each line corresponds to one haplotype, and each column to a character. No column should be empty (no missing haplotype) and each population (row) should be composed of AT LEAST 3 individuals![br/] The output file provides permutated values of Nst in a single row, and the value of the last 5% and last 1%. The mean of the permutated values is also given and should be close to the Gst value (by construction). To test if the observed Nst value is larger than the Gst, we count how many permutated values are larger than the observed Nst. If you have 5% of the permutated values greater than the observed value of Nst, then your test is not significant, otherwise it is and you know the P-value. This is akin to testing if Gst = Nst.[br/] [br/] README CpSSR :[br/] It computes measures of diversity and differenciation from haploid population genetic data, when the difference in number of repeats between alleles is available, and tests whether the differentiation and diversity measures differ from the equivalent measures when the distances between haplotypes is not considered (ie, when all haplotypes are considered equally divergent). The source file should be an ASCII file (its name should have 8 characters maximum: 12345678.txt) and should include the following information:[br/] First line :[br/] Number of cytotypes Number of populations Number of cpSSR loci. The program asks for the number of permutations to be made. See the example (\\ExampleCpSSR\\input.txt and \\ExamplePermut\\CpSSR.out).[br/] Then follows the number of individuals having a given haplotype (column) in a given population (row). Finally, and without interruption, provide the table of length variant states for all haplotypes, where each line corresponds to one haplotype, and each column to a character. No column should be empty (no missing haplotype) and each population (row) should be composed of AT LEAST 3 individuals![br/] The output file provides permutated values of Rst in a single row, and the value of the last 5% and last 1%. The mean of the permutated values is also given and should be close to the Gst value (by construction). To test if the observed Rst value is larger than the Gst, you count how many permutated values are larger than the observed Rst. If you have 5% of the permutated values greater than the observed value of Rst, then your test is not significant, otherwise it is and you know the P-value. This is akin to testing if Gst = Rst. I usually go for a one-sided test (i.e. I test if Rst>Gst, and not Rst<>Gst).","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"id":"http://zenodo.org/communities/euromixproject"}],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-sa/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/255707"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2012-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055721330,"id":"50|od______1582::5aec1186054301b66c0c5dc35972a589","originalId":["od______1582::5aec1186054301b66c0c5dc35972a589"],"collectedfrom":[{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null}],"pid":[],"dateofcollection":"2019-01-24T16:45:07Z","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2019-01-23T18:54:28.567Z","altered":true,"baseURL":"http://oai.prodinra.inra.fr/ft","identifier":"oai:prodinra.inra.fr:402973","datestamp":"2018-03-19T00:00:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Muratorio, Sylvie","name":"Sylvie","surname":"Muratorio","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"notebook","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"modèle physiologique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"approche génétique","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"castanea","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"fagus sylvatica","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"PDG Documentation, version 2","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by-sa/3.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"url":["http://prodinra.inra.fr/record/402973"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::1582","value":"ProdInra","dataInfo":null},"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"documentationUrl":[],"license":[],"codeRepositoryUrl":null,"programmingLanguage":null} diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index a5839da11..aee66fd5e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -80,4 +80,34 @@ where reltype='resultResult' and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE and r1.resulttype.classname != 'other' and r2.resulttype.classname != 'other' - and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; \ No newline at end of file + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; + +create table ${stats_db_name}.result_citations_oc stored as parquet as +select substr(target, 4) as id, count(distinct substr(source, 4)) as citations +from ${openaire_db_name}.relation rel +join ${openaire_db_name}.result r1 on rel.source=r1.id +join ${openaire_db_name}.result r2 on r2.id=rel.target +where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations' + and reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE +group by substr(target, 4); + +create table ${stats_db_name}.result_references_oc stored as parquet as +select substr(source, 4) as id, count(distinct substr(target, 4)) as references +from ${openaire_db_name}.relation rel + join ${openaire_db_name}.result r1 on rel.source=r1.id + join ${openaire_db_name}.result r2 on r2.id=rel.target +where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:crosswalk:opencitations' + and reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE +group by substr(source, 4); \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 24e6bff7e..db40cf973 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -82,31 +82,31 @@ on r.id= tmp.id; compute stats indi_funded_result_with_fundref; -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country; +-- create table indi_result_org_country_collab stored as parquet as +-- with tmp as +-- (select o.id as id, o.country , ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id where o.country <> 'UNKNOWN') +-- select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id and o1.country<>o2.country +-- group by o1.id, o1.type,o2.country; +-- +-- compute stats indi_result_org_country_collab; -compute stats indi_result_org_country_collab; - -create table indi_result_org_collab stored as parquet as -with tmp as -(select o.id, ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id) -select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id -group by o1.id, o2.id, o1.type; - -compute stats indi_result_org_collab; +-- create table indi_result_org_collab stored as parquet as +-- with tmp as +-- (select o.id, ro.id as result,r.type from organization o +-- join result_organization ro on o.id=ro.organization +-- join result r on r.id=ro.id) +-- select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations +-- from tmp as o1 +-- join tmp as o2 on o1.result=o2.result +-- where o1.id<>o2.id +-- group by o1.id, o2.id, o1.type; +-- +-- compute stats indi_result_org_collab; create table indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from organization_projects op diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index bcc9f0b5d..7412910a9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -18,28 +18,45 @@ create table TARGET.result stored as parquet as select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) union all select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::759d59f05d77188faee99b7493b46805', - 'openorgs____::b84450f9864182c67b8611b5593f4250', - 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', - 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', - 'openorgs____::d169c7407dd417152596908d48c11460', - 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', - 'openorgs____::2fb1e47b4612688d9de9169d579939a7', - 'openorgs____::759d59f05d77188faee99b7493b46805', - 'openorgs____::cad284878801b9465fa51a95b1d779db', - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', - 'openorgs____::c0286313e36479eff8676dba9b724b40' - -- ,'openorgs____::c80a8243a5e5c620d7931c88d93bf17a' -- Paris Diderot - ) )) foo; + 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" + 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? + 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade + 'openorgs____::2fb1e47b4612688d9de9169d579939a7', --University of Helsinki + 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho + 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen + 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens + -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot + 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University + 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark + 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin + 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt + 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven + 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape + 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute + 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University + 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg + 'openorgs____::6445d7758d3a40c4d997953b6632a368' --National Institute of Informatics (NII) + ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_citations; +create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_references_oc; + +create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_citations_oc; + create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_classifications; +create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_apc; + create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_concepts; @@ -90,11 +107,6 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_apc; - - - create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index e1c36cbc0..b5eba6111 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -127,6 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' + and r.target like '50|%' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index fa3eca1a9..6fa0e6fdf 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -93,7 +93,7 @@ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; +WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.target like '20|%' and r.datainfo.invisible=false; -- datasource sources: -- where the datasource info have been collected from.