diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java index 6140ed2c0..bc7bdab97 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java @@ -64,27 +64,27 @@ public class BaseCollectorPlugin implements CollectorPlugin { throw new CollectorException(e); } - final Set excludedOpendoarIds = findExcludedOpendoarIds(dbUrl, dbUser, dbPassword); + final Set acceptedOpendoarIds = findAcceptedOpendoarIds(dbUrl, dbUser, dbPassword); final Iterator iterator = new BaseCollectorIterator(this.fs, filePath, report); final Spliterator spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED); return StreamSupport .stream(spliterator, false) - .filter(doc -> filterXml(doc, excludedOpendoarIds, report)); + .filter(doc -> filterXml(doc, acceptedOpendoarIds, report)); } - private Set findExcludedOpendoarIds(final String dbUrl, final String dbUser, final String dbPassword) throws CollectorException { - final Set excluded = new HashSet<>(); + private Set findAcceptedOpendoarIds(final String dbUrl, final String dbUser, final String dbPassword) throws CollectorException { + final Set accepted = new HashSet<>(); try (final DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { final String sql = IOUtils .toString(BaseAnalyzerJob.class - .getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-ds-exclusion.sql")); + .getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql")); dbClient.processResults(sql, row -> { try { - excluded.add(row.getString("id")); + accepted.add(row.getString("id")); } catch (final SQLException e) { log.error("Error in SQL", e); throw new RuntimeException("Error in SQL", e); @@ -94,13 +94,13 @@ public class BaseCollectorPlugin implements CollectorPlugin { log.error("Error accessong SQL", e); throw new CollectorException("Error accessong SQL", e); } - return excluded; + return accepted; } - private boolean filterXml(final String xml, final Set excludedOpendoarIds, final AggregatorReport report) { + private boolean filterXml(final String xml, final Set acceptedOpendoarIds, final AggregatorReport report) { try { final String id = DocumentHelper.parseText(xml).valueOf("//*[local-name='collection']/@opendoar_id").trim(); - return (StringUtils.isNotBlank(id) && !excludedOpendoarIds.contains("opendoar____::" + id.trim())); + return (StringUtils.isNotBlank(id) && acceptedOpendoarIds.contains("opendoar____::" + id.trim())); } catch (final DocumentException e) { log.error("Error parsing document", e); throw new RuntimeException("Error parsing document", e); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql new file mode 100644 index 000000000..b7dd835ee --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql @@ -0,0 +1,7 @@ +select s.id as id +from dsm_services s +where collectedfrom = 'openaire____::opendoar' +and jurisdiction = 'Institutional' +and s.id not in ( + select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%' or last_collection_total > 0 +); \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-ds-exclusion.sql b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-ds-exclusion.sql deleted file mode 100644 index 6a961263c..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-ds-exclusion.sql +++ /dev/null @@ -1,8 +0,0 @@ -select distinct - s.id as id -from - dsm_services s - join dsm_api a on (s.id = a.service) -where - collectedfrom = 'openaire____::opendoar' - and (coalesce(a.compatibility_override, a.compatibility) like '%openaire%' or a.last_collection_total > 0); \ No newline at end of file