From 455f2e1e07a2e5382d99e8c9a7b9e0494fab6c61 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 15 Mar 2024 14:56:39 +0100 Subject: [PATCH] apply commits from master --- .../dhp/collection/plugin/base/BaseCollectorPlugin.java | 9 ++++++++- .../eu/dnetlib/dhp/collection/plugin/base/sql/base.sql | 2 +- .../dhp/collection/plugin/base/sql/opendoar-accepted.sql | 8 +++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java index 0cd68b12a..42341b406 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java @@ -45,15 +45,22 @@ public class BaseCollectorPlugin implements CollectorPlugin { @Override public Stream collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException { - // get path to file + // the path of the dump file on HDFS + // http://oai.base-search.net/initial_load/base_oaipmh_dump-current.tar + // it could be downloaded from iis-cdh5-test-gw.ocean.icm.edu.pl and then copied on HDFS final Path filePath = Optional .ofNullable(api.getBaseUrl()) .map(Path::new) .orElseThrow(() -> new CollectorException("missing baseUrl")); + // get the parameters for the connection to the OpenAIRE database. + // the database is used to obtain the list of the datasources that the plugin will collect final String dbUrl = api.getParams().get("dbUrl"); final String dbUser = api.getParams().get("dbUser"); final String dbPassword = api.getParams().get("dbPassword"); + + // the types(comma separated, empty value for all) that the plugin will collect, + // the types should be expressed in the format of the normalized types of BASE (for example 1,121,...) final String acceptedNormTypesString = api.getParams().get("acceptedNormTypes"); log.info("baseUrl: {}", filePath); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql index b9300f6a8..0303962fd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/base.sql @@ -108,7 +108,7 @@ INSERT INTO dsm_apiparams( 'api_________::openaire____::base_search::dump@@acceptedNormTypes', 'api_________::openaire____::base_search::dump', 'acceptedNormTypes', - '' + '1,11,111,121,13,14,15,18,181,182,183,1A,6,7' ); COMMIT; \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql index b7dd835ee..7b3cb9610 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql @@ -2,6 +2,8 @@ select s.id as id from dsm_services s where collectedfrom = 'openaire____::opendoar' and jurisdiction = 'Institutional' -and s.id not in ( - select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%' or last_collection_total > 0 -); \ No newline at end of file +and s.id in ( + select service from dsm_api where coalesce(compatibility_override, compatibility) = 'driver' or coalesce(compatibility_override, compatibility) = 'UNKNOWN' +) and s.id not in ( + select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%' +);