Update the "findAssignmentsQuery" to check the "attempt.error_class" field for the current pub_url, not the pub_id.
This commit is contained in:
parent
ebf8896005
commit
0c79fdea35
|
@ -121,17 +121,16 @@ public class UrlsServiceImpl implements UrlsService {
|
|||
" on p.id=pb.id\n" +
|
||||
" left outer join (select count(at.original_url) as counts, at.original_url from " + DatabaseConnector.databaseName + ".attempt at group by at.original_url) as attempts\n" +
|
||||
" on attempts.original_url=pu.url\n" +
|
||||
" left anti join (\n" +
|
||||
" select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" +
|
||||
" left anti join (select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" +
|
||||
" union all\n" +
|
||||
" select pl.original_url from " + DatabaseConnector.databaseName + ".payload pl\n" + // Here we access the payload-VIEW which includes the three payload-tables.
|
||||
" ) as existing\n" +
|
||||
" ) as existing\n" +
|
||||
" on existing.original_url=pu.url\n" +
|
||||
" where d.allow_harvest=true\n" +
|
||||
((excludedDatasourceIDsStringList != null) ? // If we have an exclusion-list, use it below.
|
||||
(" and d.id not in " + excludedDatasourceIDsStringList + GenericUtils.endOfLine) : "") +
|
||||
" and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() + GenericUtils.endOfLine +
|
||||
" and not exists (select 1 from " + DatabaseConnector.databaseName + ".attempt a where a.id=p.id and a.error_class = 'noRetry' limit 1)\n" +
|
||||
" and not exists (select 1 from " + DatabaseConnector.databaseName + ".attempt a where a.original_url=pu.url and a.error_class = 'noRetry' limit 1)\n" +
|
||||
" and pu.url != '' and pu.url is not null\n" + // Some IDs have empty-string urls, there are no "null" urls, but keep the relevant check for future-proofing.
|
||||
" and (p.year <= " + currentYear + " or p.year > " + (currentYear + 5) + ")\n" + // Exclude the pubs which will be published in the next 5 years. They don't provide full-texts now. (We don't exclude all future pubs, since, some have invalid year, like "9999").
|
||||
") as distinct_results\n" +
|
||||
|
|
Loading…
Reference in New Issue