forked from lsmyrnaios/UrlsController
- Improve prioritization of the most recent publications.
- Avoid processing publications which will be published in the next 5 years, counting from each "current" year, since they are not providing full-texts yet. Still allow the invalid publication-years like "2566", "9999", etc.
This commit is contained in:
parent
4014d1eabb
commit
718f5cfefb
|
@ -28,9 +28,7 @@ import java.sql.Connection;
|
||||||
import java.sql.PreparedStatement;
|
import java.sql.PreparedStatement;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.sql.Timestamp;
|
import java.sql.Timestamp;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
@ -108,6 +106,8 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
@Timed(value = "getAssignments.time", description = "Time taken to return the assignments.")
|
@Timed(value = "getAssignments.time", description = "Time taken to return the assignments.")
|
||||||
public ResponseEntity<?> getAssignments(String workerId, int assignmentsLimit)
|
public ResponseEntity<?> getAssignments(String workerId, int assignmentsLimit)
|
||||||
{
|
{
|
||||||
|
int currentYear = Calendar.getInstance().get(Calendar.YEAR);
|
||||||
|
|
||||||
// Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >.
|
// Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >.
|
||||||
String findAssignmentsQuery =
|
String findAssignmentsQuery =
|
||||||
"select pubid, url, datasourceid, datasourcename\n" + // Select the final sorted data with "assignmentsLimit".
|
"select pubid, url, datasourceid, datasourcename\n" + // Select the final sorted data with "assignmentsLimit".
|
||||||
|
@ -132,7 +132,8 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
" and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() + "\n" +
|
" and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() + "\n" +
|
||||||
" and not exists (select 1 from " + DatabaseConnector.databaseName + ".attempt a where a.id=p.id and a.error_class = 'noRetry' limit 1)\n" +
|
" and not exists (select 1 from " + DatabaseConnector.databaseName + ".attempt a where a.id=p.id and a.error_class = 'noRetry' limit 1)\n" +
|
||||||
" and pu.url != '' and pu.url is not null\n" + // Some IDs have empty-string urls, there are no "null" urls, but keep the relevant check for future-proofing.
|
" and pu.url != '' and pu.url is not null\n" + // Some IDs have empty-string urls, there are no "null" urls, but keep the relevant check for future-proofing.
|
||||||
" order by coalesce(level, 0) desc\n" +
|
" and (p.year <= " + currentYear + " or p.year > " + (currentYear + 5) + ")\n" + // Exclude the pubs which will be published in the next 5 years. They don't provide full-texts now. (We don't exclude all future pubs, since, some have invalid year, like "9999").
|
||||||
|
" order by coalesce(level, 0) desc, coalesce(pub_year, 0) desc\n" +
|
||||||
" limit " + (assignmentsLimit * 10) + "\n" +
|
" limit " + (assignmentsLimit * 10) + "\n" +
|
||||||
" ) as non_distinct_results\n" +
|
" ) as non_distinct_results\n" +
|
||||||
" order by coalesce(level, 0) desc, coalesce(pub_year, 0) desc, coalesce(attempt_count, 0), reverse(pubid), url\n" + // We also order by reverse "pubid" and "url", in order to get the exactly same records for consecutive runs, all things being equal.
|
" order by coalesce(level, 0) desc, coalesce(pub_year, 0) desc, coalesce(attempt_count, 0), reverse(pubid), url\n" + // We also order by reverse "pubid" and "url", in order to get the exactly same records for consecutive runs, all things being equal.
|
||||||
|
|
Loading…
Reference in New Issue