From 9e9f417f1fbd68701d78e7fde9d09a6cc5a2e458 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Thu, 27 Jun 2024 23:10:46 +0300 Subject: [PATCH] - Remove the unused "accessmode" column from the results returned by the "findAssignmentsQuery". - Update dependencies. - Code polishing. --- build.gradle | 4 ++-- .../openaire/urls_controller/services/StatsServiceImpl.java | 1 + .../openaire/urls_controller/services/UrlsServiceImpl.java | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/build.gradle b/build.gradle index de71530..5d53800 100644 --- a/build.gradle +++ b/build.gradle @@ -54,7 +54,7 @@ dependencies { } implementation 'com.github.luben:zstd-jni:1.5.6-3' // Even though this is part of the above dependency, the Apache commons rarely updates it, while the zstd team makes improvements very often. - implementation 'io.minio:minio:8.5.10' + implementation 'io.minio:minio:8.5.11' // https://mvnrepository.com/artifact/com.cloudera.impala/jdbc implementation("com.cloudera.impala:jdbc:2.5.31") { @@ -110,7 +110,7 @@ dependencies { // Add back some updated version of the needed dependencies. implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8. - implementation 'com.fasterxml.woodstox:woodstox-core:6.6.2' + implementation 'com.fasterxml.woodstox:woodstox-core:7.0.0' // https://mvnrepository.com/artifact/com.google.code.gson/gson implementation 'com.google.code.gson:gson:2.11.0' diff --git a/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java index f12eff4..1173ebe 100644 --- a/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java @@ -70,6 +70,7 @@ public class StatsServiceImpl implements StatsService { // Before all the records are inspected, this endpoint will report all the inspected records MINUS the duplicate records which come straight from the "publication" table. final String getInspectedRecordsNumberQuery = "select count(dist.id) from (select distinct id, original_url from " + DatabaseConnector.databaseName + ".attempt) as dist"; + // An ID related to multiple urls is counted as many times as the number of distinct urls associated with it. try { Object result = jdbcTemplate.queryForObject(getInspectedRecordsNumberQuery, Integer.class); if ( result != null ) { diff --git a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java index da6f08a..ceaba48 100644 --- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java @@ -119,8 +119,8 @@ public class UrlsServiceImpl implements UrlsService { // Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >. String findAssignmentsQuery = - "select pubid, url, datasourceid, datasourcename, accessmode\n" + // Select the final sorted data with "assignmentsLimit". - "from (select distinct p.id as pubid, pu.url as url, pb.level as level, attempts.counts as attempt_count, p.year as pub_year, d.id as datasourceid, d.name as datasourcename, p.accessmode\n" + // Select the distinct id-url data. Beware that this will return duplicate id-url pairs, wince one pair may be associated with multiple datasources. + "select pubid, url, datasourceid, datasourcename\n" + // Select the final sorted data with "assignmentsLimit". + "from (select distinct p.id as pubid, pu.url as url, d.id as datasourceid, d.name as datasourcename, attempts.counts as attempt_count, pb.level as level, p.accessmode, p.year as pub_year\n" + // Select the distinct id-url data. Beware that this will return duplicate id-url pairs, wince one pair may be associated with multiple datasources. " from " + DatabaseConnector.databaseName + ".publication_urls pu\n" + " join " + DatabaseConnector.databaseName + ".publication p on p.id=pu.id\n" + " join " + DatabaseConnector.databaseName + ".datasource d on d.id=p.datasourceid and d.allow_harvest=true"+ @@ -129,6 +129,7 @@ public class UrlsServiceImpl implements UrlsService { " left anti join (select a.original_url from " + DatabaseConnector.databaseName + ".assignment a\n" + " union all\n" + " select pl.original_url from " + DatabaseConnector.databaseName + ".payload pl\n" + // Here we access the payload-VIEW which includes the three payload-tables. + // todo - should we check the "actual-url" here as well?? " ) as existing\n" + " on existing.original_url=pu.url\n" + " left outer join " + DatabaseConnector.databaseName + ".publication_boost pb\n" + @@ -182,7 +183,6 @@ public class UrlsServiceImpl implements UrlsService { assignment.setOriginalUrl(rs.getString(2)); datasource.setId(rs.getString(3)); datasource.setName(rs.getString(4)); - // The 5th column is the "accessmode" which we do not need after the prioritization takes place in the query. } catch (SQLException sqle) { logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle); }