From 0c79fdea35d826cbb15d13ed1e71a857b689456e Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Fri, 6 Oct 2023 14:59:26 +0300 Subject: [PATCH] Update the "findAssignmentsQuery" to check the "attempt.error_class" field for the current pub_url, not the pub_id. --- .../openaire/urls_controller/services/UrlsServiceImpl.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java index 70c011a..5d2041b 100644 --- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java @@ -121,17 +121,16 @@ public class UrlsServiceImpl implements UrlsService { " on p.id=pb.id\n" + " left outer join (select count(at.original_url) as counts, at.original_url from " + DatabaseConnector.databaseName + ".attempt at group by at.original_url) as attempts\n" + " on attempts.original_url=pu.url\n" + - " left anti join (\n" + - " select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" + + " left anti join (select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" + " union all\n" + " select pl.original_url from " + DatabaseConnector.databaseName + ".payload pl\n" + // Here we access the payload-VIEW which includes the three payload-tables. - " ) as existing\n" + + " ) as existing\n" + " on existing.original_url=pu.url\n" + " where d.allow_harvest=true\n" + ((excludedDatasourceIDsStringList != null) ? // If we have an exclusion-list, use it below. (" and d.id not in " + excludedDatasourceIDsStringList + GenericUtils.endOfLine) : "") + " and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() + GenericUtils.endOfLine + - " and not exists (select 1 from " + DatabaseConnector.databaseName + ".attempt a where a.id=p.id and a.error_class = 'noRetry' limit 1)\n" + + " and not exists (select 1 from " + DatabaseConnector.databaseName + ".attempt a where a.original_url=pu.url and a.error_class = 'noRetry' limit 1)\n" + " and pu.url != '' and pu.url is not null\n" + // Some IDs have empty-string urls, there are no "null" urls, but keep the relevant check for future-proofing. " and (p.year <= " + currentYear + " or p.year > " + (currentYear + 5) + ")\n" + // Exclude the pubs which will be published in the next 5 years. They don't provide full-texts now. (We don't exclude all future pubs, since, some have invalid year, like "9999"). ") as distinct_results\n" +