forked from lsmyrnaios/UrlsController
Code polishing.
This commit is contained in:
parent
b7f6056032
commit
c8485d472e
|
@ -66,24 +66,24 @@ public class UrlsServiceImpl implements UrlsService {
|
|||
|
||||
String findAssignmentsQuery = "select pubid, url, datasourceid, datasourcetype\n" +
|
||||
"from (select distinct pubid, url, datasourceid, datasourcetype, attempt_count\n" +
|
||||
"from (\n" +
|
||||
"select p.id as pubid, pu.url as url, d.id as datasourceid, d.type as datasourcetype, attempts.counts as attempt_count\n" +
|
||||
"from " + ImpalaConnector.databaseName + ".publication p\n" +
|
||||
"join " + ImpalaConnector.databaseName + ".publication_urls pu on pu.id=p.id\n" +
|
||||
"join " + ImpalaConnector.databaseName + ".datasource d on d.id=p.datasourceid\n" +
|
||||
"left outer join (select count(a.id) as counts, a.id from " + ImpalaConnector.databaseName + ".attempt a group by a.id) as attempts\n" +
|
||||
"on attempts.id=p.id\n" +
|
||||
"left outer join (select a.id, a.original_url from " + ImpalaConnector.databaseName + ".assignment a\n" +
|
||||
"union all\n" +
|
||||
"select pl.id, pl.original_url from " + ImpalaConnector.databaseName + ".payload pl) as existing\n" +
|
||||
"on existing.id=p.id and existing.original_url=pu.url\n" +
|
||||
"where d.allow_harvest=true and existing.id is null and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() +
|
||||
"\nand not exists (select 1 from " + ImpalaConnector.databaseName + ".attempt a where a.id=p.id and a.error_class = 'noRetry' limit 1)\n" +
|
||||
"and pu.url != '' and pu.url is not null\n" + // Some IDs have empty-string urls, there are no "null" urls, but keep the relevant check for future-proofing.
|
||||
"limit " + (assignmentsLimit * 10) +
|
||||
")\nas non_distinct_results\n" +
|
||||
"order by coalesce(attempt_count, 0), reverse(pubid), url\n" +
|
||||
"limit " + assignmentsLimit +
|
||||
"from (\n" +
|
||||
"select p.id as pubid, pu.url as url, d.id as datasourceid, d.type as datasourcetype, attempts.counts as attempt_count\n" +
|
||||
"from " + ImpalaConnector.databaseName + ".publication p\n" +
|
||||
"join " + ImpalaConnector.databaseName + ".publication_urls pu on pu.id=p.id\n" +
|
||||
"join " + ImpalaConnector.databaseName + ".datasource d on d.id=p.datasourceid\n" +
|
||||
"left outer join (select count(a.id) as counts, a.id from " + ImpalaConnector.databaseName + ".attempt a group by a.id) as attempts\n" +
|
||||
"on attempts.id=p.id\n" +
|
||||
"left outer join (select a.id, a.original_url from " + ImpalaConnector.databaseName + ".assignment a\n" +
|
||||
"union all\n" +
|
||||
"select pl.id, pl.original_url from " + ImpalaConnector.databaseName + ".payload pl) as existing\n" +
|
||||
"on existing.id=p.id and existing.original_url=pu.url\n" +
|
||||
"where d.allow_harvest=true and existing.id is null and coalesce(attempts.counts, 0) <= " + maxAttemptsPerRecordAtomic.get() +
|
||||
"\nand not exists (select 1 from " + ImpalaConnector.databaseName + ".attempt a where a.id=p.id and a.error_class = 'noRetry' limit 1)\n" +
|
||||
"and pu.url != '' and pu.url is not null\n" + // Some IDs have empty-string urls, there are no "null" urls, but keep the relevant check for future-proofing.
|
||||
"limit " + (assignmentsLimit * 10) +
|
||||
")\nas non_distinct_results\n" +
|
||||
"order by coalesce(attempt_count, 0), reverse(pubid), url\n" +
|
||||
"limit " + assignmentsLimit +
|
||||
"\n) as findAssignmentsQuery";
|
||||
|
||||
// The "order by" in the end makes sure the older attempted records will be re-attempted after a long time.
|
||||
|
|
|
@ -180,12 +180,12 @@ public class FileUtils {
|
|||
// Extract the "fileNameWithExtension" to be added in the HashMultimap.
|
||||
Matcher matcher = FILENAME_ID_EXTENSION.matcher(fileLocation);
|
||||
if ( ! matcher.matches() ) {
|
||||
logger.error("Failed to match the \"fileLocation\": \"" + fileLocation + "\", using this regex: " + FILENAME_ID_EXTENSION);
|
||||
logger.error("Failed to match the \"fileLocation\": \"" + fileLocation + "\" of id: \"" + payload.getId() + "\", originalUrl: \"" + payload.getOriginal_url() + "\", using this regex: " + FILENAME_ID_EXTENSION);
|
||||
return null;
|
||||
}
|
||||
String fileNameWithExtension = matcher.group(1);
|
||||
if ( (fileNameWithExtension == null) || fileNameWithExtension.isEmpty() ) {
|
||||
logger.error("Failed to extract the \"fileNameWithExtension\" from \"" + fileLocation + "\".");
|
||||
logger.error("Failed to extract the \"fileNameWithExtension\" from \"fileLocation\": \"" + fileLocation + "\", of id: \"" + payload.getId() + "\", originalUrl: \"" + payload.getOriginal_url() + "\", using this regex: " + FILENAME_ID_EXTENSION);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue