- Remove the unused "accessmode" column from the results returned by the "findAssignmentsQuery".
- Update dependencies. - Code polishing.
This commit is contained in:
parent
e46743bfba
commit
9e9f417f1f
|
@ -54,7 +54,7 @@ dependencies {
|
||||||
}
|
}
|
||||||
implementation 'com.github.luben:zstd-jni:1.5.6-3' // Even though this is part of the above dependency, the Apache commons rarely updates it, while the zstd team makes improvements very often.
|
implementation 'com.github.luben:zstd-jni:1.5.6-3' // Even though this is part of the above dependency, the Apache commons rarely updates it, while the zstd team makes improvements very often.
|
||||||
|
|
||||||
implementation 'io.minio:minio:8.5.10'
|
implementation 'io.minio:minio:8.5.11'
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
|
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
|
||||||
implementation("com.cloudera.impala:jdbc:2.5.31") {
|
implementation("com.cloudera.impala:jdbc:2.5.31") {
|
||||||
|
@ -110,7 +110,7 @@ dependencies {
|
||||||
|
|
||||||
// Add back some updated version of the needed dependencies.
|
// Add back some updated version of the needed dependencies.
|
||||||
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
||||||
implementation 'com.fasterxml.woodstox:woodstox-core:6.6.2'
|
implementation 'com.fasterxml.woodstox:woodstox-core:7.0.0'
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.google.code.gson/gson
|
// https://mvnrepository.com/artifact/com.google.code.gson/gson
|
||||||
implementation 'com.google.code.gson:gson:2.11.0'
|
implementation 'com.google.code.gson:gson:2.11.0'
|
||||||
|
|
|
@ -70,6 +70,7 @@ public class StatsServiceImpl implements StatsService {
|
||||||
// Before all the records are inspected, this endpoint will report all the inspected records MINUS the duplicate records which come straight from the "publication" table.
|
// Before all the records are inspected, this endpoint will report all the inspected records MINUS the duplicate records which come straight from the "publication" table.
|
||||||
|
|
||||||
final String getInspectedRecordsNumberQuery = "select count(dist.id) from (select distinct id, original_url from " + DatabaseConnector.databaseName + ".attempt) as dist";
|
final String getInspectedRecordsNumberQuery = "select count(dist.id) from (select distinct id, original_url from " + DatabaseConnector.databaseName + ".attempt) as dist";
|
||||||
|
// An ID related to multiple urls is counted as many times as the number of distinct urls associated with it.
|
||||||
try {
|
try {
|
||||||
Object result = jdbcTemplate.queryForObject(getInspectedRecordsNumberQuery, Integer.class);
|
Object result = jdbcTemplate.queryForObject(getInspectedRecordsNumberQuery, Integer.class);
|
||||||
if ( result != null ) {
|
if ( result != null ) {
|
||||||
|
|
|
@ -119,8 +119,8 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
|
|
||||||
// Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >.
|
// Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >.
|
||||||
String findAssignmentsQuery =
|
String findAssignmentsQuery =
|
||||||
"select pubid, url, datasourceid, datasourcename, accessmode\n" + // Select the final sorted data with "assignmentsLimit".
|
"select pubid, url, datasourceid, datasourcename\n" + // Select the final sorted data with "assignmentsLimit".
|
||||||
"from (select distinct p.id as pubid, pu.url as url, pb.level as level, attempts.counts as attempt_count, p.year as pub_year, d.id as datasourceid, d.name as datasourcename, p.accessmode\n" + // Select the distinct id-url data. Beware that this will return duplicate id-url pairs, wince one pair may be associated with multiple datasources.
|
"from (select distinct p.id as pubid, pu.url as url, d.id as datasourceid, d.name as datasourcename, attempts.counts as attempt_count, pb.level as level, p.accessmode, p.year as pub_year\n" + // Select the distinct id-url data. Beware that this will return duplicate id-url pairs, wince one pair may be associated with multiple datasources.
|
||||||
" from " + DatabaseConnector.databaseName + ".publication_urls pu\n" +
|
" from " + DatabaseConnector.databaseName + ".publication_urls pu\n" +
|
||||||
" join " + DatabaseConnector.databaseName + ".publication p on p.id=pu.id\n" +
|
" join " + DatabaseConnector.databaseName + ".publication p on p.id=pu.id\n" +
|
||||||
" join " + DatabaseConnector.databaseName + ".datasource d on d.id=p.datasourceid and d.allow_harvest=true"+
|
" join " + DatabaseConnector.databaseName + ".datasource d on d.id=p.datasourceid and d.allow_harvest=true"+
|
||||||
|
@ -129,6 +129,7 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
" left anti join (select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" +
|
" left anti join (select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" +
|
||||||
" union all\n" +
|
" union all\n" +
|
||||||
" select pl.original_url from " + DatabaseConnector.databaseName + ".payload pl\n" + // Here we access the payload-VIEW which includes the three payload-tables.
|
" select pl.original_url from " + DatabaseConnector.databaseName + ".payload pl\n" + // Here we access the payload-VIEW which includes the three payload-tables.
|
||||||
|
// todo - should we check the "actual-url" here as well??
|
||||||
" ) as existing\n" +
|
" ) as existing\n" +
|
||||||
" on existing.original_url=pu.url\n" +
|
" on existing.original_url=pu.url\n" +
|
||||||
" left outer join " + DatabaseConnector.databaseName + ".publication_boost pb\n" +
|
" left outer join " + DatabaseConnector.databaseName + ".publication_boost pb\n" +
|
||||||
|
@ -182,7 +183,6 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
assignment.setOriginalUrl(rs.getString(2));
|
assignment.setOriginalUrl(rs.getString(2));
|
||||||
datasource.setId(rs.getString(3));
|
datasource.setId(rs.getString(3));
|
||||||
datasource.setName(rs.getString(4));
|
datasource.setName(rs.getString(4));
|
||||||
// The 5th column is the "accessmode" which we do not need after the prioritization takes place in the query.
|
|
||||||
} catch (SQLException sqle) {
|
} catch (SQLException sqle) {
|
||||||
logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle);
|
logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue