From acef8911677b938512977e283dbf7040a190fb3c Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Mon, 4 Sep 2023 15:34:37 +0300 Subject: [PATCH] Improve prioritization of "publication_boost" records, by adding a second ordering in the end. --- .../openaire/urls_controller/services/UrlsServiceImpl.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java index 981b862..20692d0 100644 --- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java @@ -111,7 +111,7 @@ public class UrlsServiceImpl implements UrlsService { // Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >. String findAssignmentsQuery = "select pubid, url, datasourceid, datasourcename\n" + // Select the final sorted data with "assignmentsLimit". - "from (select distinct pubid, url, datasourceid, datasourcename, attempt_count, pub_year\n" + // Select the distinct id-url data. Beware that this will return duplicate id-url paris, wince one pair may be associated with multiple datasources. + "from (select distinct pubid, url, datasourceid, datasourcename, level, pub_year, attempt_count\n" + // Select the distinct id-url data. Beware that this will return duplicate id-url paris, wince one pair may be associated with multiple datasources. " from (select p.id as pubid, pu.url as url, pb.level as level, attempts.counts as attempt_count, p.year as pub_year, d.id as datasourceid, d.name as datasourcename\n" + // Select all needed columns frm JOINs, order by "boost.level" and limit them to (assignmentsLimit * 10) " from " + DatabaseConnector.databaseName + ".publication p\n" + " join " + DatabaseConnector.databaseName + ".publication_urls pu on pu.id=p.id\n" + @@ -132,10 +132,10 @@ public class UrlsServiceImpl implements UrlsService { " and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() + "\n" + " and not exists (select 1 from " + DatabaseConnector.databaseName + ".attempt a where a.id=p.id and a.error_class = 'noRetry' limit 1)\n" + " and pu.url != '' and pu.url is not null\n" + // Some IDs have empty-string urls, there are no "null" urls, but keep the relevant check for future-proofing. - " order by coalesce(level, -1000) desc\n" + + " order by coalesce(level, 0) desc\n" + " limit " + (assignmentsLimit * 10) + "\n" + " ) as non_distinct_results\n" + - " order by coalesce(attempt_count, 0), coalesce(pub_year, 0) desc, reverse(pubid), url\n" + // We also order by reverse "pubid" and "url", in order to get the exactly same records for consecutive runs, all things being equal. + " order by coalesce(level, 0) desc, coalesce(pub_year, 0) desc, coalesce(attempt_count, 0), reverse(pubid), url\n" + // We also order by reverse "pubid" and "url", in order to get the exactly same records for consecutive runs, all things being equal. " limit " + assignmentsLimit + "\n" + ") as findAssignmentsQuery";