parent
4af298a52a
commit
38643c76a3
|
@ -103,7 +103,7 @@ dependencies {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add back some updated version of the needed dependencies.
|
// Add back some updated version of the needed dependencies.
|
||||||
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions are not compatible with JAVA 8.
|
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
||||||
implementation 'com.fasterxml.woodstox:woodstox-core:6.5.0'
|
implementation 'com.fasterxml.woodstox:woodstox-core:6.5.0'
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/org.json/json
|
// https://mvnrepository.com/artifact/org.json/json
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.1-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip
|
||||||
networkTimeout=10000
|
networkTimeout=10000
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
|
|
|
@ -26,7 +26,7 @@ if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then
|
||||||
justInstall=0
|
justInstall=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
gradleVersion="8.0.1"
|
gradleVersion="8.0.2"
|
||||||
|
|
||||||
if [[ justInstall -eq 0 ]]; then
|
if [[ justInstall -eq 0 ]]; then
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ public class UrlController {
|
||||||
|
|
||||||
|
|
||||||
@GetMapping("")
|
@GetMapping("")
|
||||||
public ResponseEntity<?> getUrls(@RequestParam String workerId, @RequestParam int workerAssignmentsLimit) {
|
public ResponseEntity<?> getAssignments(@RequestParam String workerId, @RequestParam int workerAssignmentsLimit) {
|
||||||
|
|
||||||
// As the Impala-driver is buggy and struggles to support parameterized queries in some types of prepared-statements, we have to sanitize the "workerId" ourselves.
|
// As the Impala-driver is buggy and struggles to support parameterized queries in some types of prepared-statements, we have to sanitize the "workerId" ourselves.
|
||||||
if ( MALICIOUS_INPUT_STRING.matcher(workerId).matches() ) {
|
if ( MALICIOUS_INPUT_STRING.matcher(workerId).matches() ) {
|
||||||
|
@ -55,7 +55,7 @@ public class UrlController {
|
||||||
assignmentsLimit = assignmentLimit;
|
assignmentsLimit = assignmentLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
return urlsService.getUrls(workerId, assignmentsLimit);
|
return urlsService.getAssignments(workerId, assignmentsLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class StatsService {
|
||||||
public ResponseEntity<?> getNumberOfRecordsInspected()
|
public ResponseEntity<?> getNumberOfRecordsInspected()
|
||||||
{
|
{
|
||||||
// Note that until all the records are inspected, the "attempt" table contains all the inspected records PLUS very few duplicates (id-url) which come from the publications-database.
|
// Note that until all the records are inspected, the "attempt" table contains all the inspected records PLUS very few duplicates (id-url) which come from the publications-database.
|
||||||
// After all the records are inspected, it contains duplicate records of more and more id-urls, as time goes one, since for every eligible record the Service re-attempts to get the full-text.
|
// After all the records are inspected, it contains duplicate records of more and more id-urls, as time goes on, since for every eligible record the Service re-attempts to get the full-text.
|
||||||
// So in order to get the number of inspected records, we want the distinct number, which at some point it will remain stable, even though the Service will try again and again some records.
|
// So in order to get the number of inspected records, we want the distinct number, which at some point it will remain stable, even though the Service will try again and again some records.
|
||||||
// Before all the records are inspected, this endpoint will report all the inspected records MINUS the duplicate records which come straight from the "publication" table.
|
// Before all the records are inspected, this endpoint will report all the inspected records MINUS the duplicate records which come straight from the "publication" table.
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ import java.util.List;
|
||||||
|
|
||||||
public interface UrlsService {
|
public interface UrlsService {
|
||||||
|
|
||||||
ResponseEntity<?> getUrls(String workerId, int assignmentsLimit);
|
ResponseEntity<?> getAssignments(String workerId, int assignmentsLimit);
|
||||||
|
|
||||||
ResponseEntity<?> addWorkerReport(String curWorkerId, long curReportAssignments, List<UrlReport> urlReports, int sizeOfUrlReports, HttpServletRequest request);
|
ResponseEntity<?> addWorkerReport(String curWorkerId, long curReportAssignments, List<UrlReport> urlReports, int sizeOfUrlReports, HttpServletRequest request);
|
||||||
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public ResponseEntity<?> getUrls(String workerId, int assignmentsLimit)
|
public ResponseEntity<?> getAssignments(String workerId, int assignmentsLimit)
|
||||||
{
|
{
|
||||||
// Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >.
|
// Create the Assignments from the id-urls stored in the database up to the < assignmentsLimit >.
|
||||||
|
|
||||||
|
|
|
@ -111,7 +111,7 @@ public class FileUtils {
|
||||||
// Possible full-filenames are: "path1/path2/ID.pdf", "ID2.pdf", "path1/path2/ID(12).pdf", "ID2(25).pdf"
|
// Possible full-filenames are: "path1/path2/ID.pdf", "ID2.pdf", "path1/path2/ID(12).pdf", "ID2(25).pdf"
|
||||||
private static final Pattern FILENAME_ID_EXTENSION = Pattern.compile("(?:[^.()]+/)?((([^./()]+)[^./]*)(\\.[\\w]{2,10}))$");
|
private static final Pattern FILENAME_ID_EXTENSION = Pattern.compile("(?:[^.()]+/)?((([^./()]+)[^./]*)(\\.[\\w]{2,10}))$");
|
||||||
|
|
||||||
private final int numOfFullTextsPerBatch = 70; // The HTTP-headers cannot be too large (It failed with 100 fileNames).
|
private static final int numOfFullTextsPerBatch = 70; // The HTTP-headers cannot be too large (It failed with 100 fileNames).
|
||||||
|
|
||||||
public static final ExecutorService hashMatchingExecutor = Executors.newFixedThreadPool(6);
|
public static final ExecutorService hashMatchingExecutor = Executors.newFixedThreadPool(6);
|
||||||
|
|
||||||
|
@ -484,7 +484,7 @@ public class FileUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static final int tenMb = 10 * 1_048_576;
|
public static final int tenMb = (10 * 1_048_576);
|
||||||
|
|
||||||
public boolean saveArchive(HttpURLConnection conn, File zstdFile)
|
public boolean saveArchive(HttpURLConnection conn, File zstdFile)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue