diff --git a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java index 3a4edce..2120ee2 100644 --- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java @@ -118,15 +118,15 @@ public class UrlsServiceImpl implements UrlsService { " join " + DatabaseConnector.databaseName + ".datasource d on d.id=p.datasourceid\n" + // This is needed for the "d.allow_harvest=true" check later on. " left outer join " + DatabaseConnector.databaseName + ".publication_boost pb\n" + " on p.id=pb.id\n" + - " left outer join (select count(a.id) as counts, a.id from " + DatabaseConnector.databaseName + ".attempt a group by a.id) as attempts\n" + - " on attempts.id=p.id\n" + - " left outer join (\n" + - " select a.id, a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" + + " left outer join (select count(at.original_url) as counts, at.original_url from " + DatabaseConnector.databaseName + ".attempt at group by at.original_url) as attempts\n" + + " on attempts.original_url=pu.url\n" + + " left anti join (\n" + + " select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" + " union all\n" + - " select pl.id, pl.original_url from " + DatabaseConnector.databaseName + ".payload pl\n" + // Here we access the payload-VIEW which includes the three payload-tables. - " ) as existing\n" + - " on existing.id=p.id and existing.original_url=pu.url\n" + - " where d.allow_harvest=true and existing.id is null\n" + // For records not found on existing, the "existing.id" will be null. + " select pl.original_url from " + DatabaseConnector.databaseName + ".payload pl\n" + // Here we access the payload-VIEW which includes the three payload-tables. + " ) as existing\n" + + " on existing.original_url=pu.url\n" + + " where d.allow_harvest=true\n" + ((excludedDatasourceIDsStringList != null) ? // If we have an exclusion-list, use it below. (" and d.id not in " + excludedDatasourceIDsStringList + "\n") : "") + " and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() + "\n" +