- Optimize the "DOC_URL_FILTER"-regex, by using a non-capturing group.

- Remove an extra "File.separator" from the fulltexts-fullFilePath.
This commit is contained in:
Lampros Smyrnaios 2024-01-19 15:46:23 +02:00
parent bdc61c2cda
commit 5dadb8ad2f
3 changed files with 4 additions and 4 deletions

View File

@ -6,7 +6,7 @@ plugins {
java { java {
group = 'eu.openaire.urls_controller' group = 'eu.openaire.urls_controller'
version = '2.7.0-SNAPSHOT' version = '2.6.3-SNAPSHOT'
sourceCompatibility = JavaVersion.VERSION_1_8 sourceCompatibility = JavaVersion.VERSION_1_8
} }

View File

@ -68,7 +68,7 @@ public class UrlsServiceImpl implements UrlsService {
private static String excludedDatasourceIDsStringList = null; private static String excludedDatasourceIDsStringList = null;
private static final String DOC_URL_FILTER = ".+(pdf|download|/doc|document|(?:/|[?]|&)file|/fulltext|attachment|/paper|viewfile|viewdoc|/get|cgi/viewcontent.cgi\\?|t[ée]l[ée]charger|descargar).*"; private static final String DOC_URL_FILTER = ".+(?:pdf|download|/doc|document|(?:/|[?]|&)file|/fulltext|attachment|/paper|viewfile|viewdoc|/get|cgi/viewcontent.cgi\\?|t[ée]l[ée]charger|descargar).*";
// "DOC_URL_FILTER" works for lowerCase Strings (we use the "ignore-case" indicator in the "regexp_like()" method). // "DOC_URL_FILTER" works for lowerCase Strings (we use the "ignore-case" indicator in the "regexp_like()" method).
@ -329,7 +329,7 @@ public class UrlsServiceImpl implements UrlsService {
DatabaseConnector.databaseLock.lock(); DatabaseConnector.databaseLock.lock();
// Lock the DB here so the prefilled-Payloads which will be generated inside the "getTasksForCreatingAndUploadingParquetFiles()" method (using a dedicated query) // Lock the DB here so the prefilled-Payloads which will be generated inside the "getTasksForCreatingAndUploadingParquetFiles()" method (using a dedicated query)
// will be synchronized with the insert of all attempt and payload records to the DB. // will be synchronized with the insert of all attempt and payload records to the DB. This action is NOT a callable-task, so it runs during the execution of this method.
// This is important in order to avoid having workers take these records as assignments, when we know that payloads are ready to be inserted for them. // This is important in order to avoid having workers take these records as assignments, when we know that payloads are ready to be inserted for them.
List<Callable<ParquetReport>> callableTasks = parquetFileUtils.getTasksForCreatingAndUploadingParquetFiles(urlReports, sizeOfUrlReports, curReportAssignmentsCounter, localParquetPath, uploadFullTextsResponse); List<Callable<ParquetReport>> callableTasks = parquetFileUtils.getTasksForCreatingAndUploadingParquetFiles(urlReports, sizeOfUrlReports, curReportAssignmentsCounter, localParquetPath, uploadFullTextsResponse);

View File

@ -598,7 +598,7 @@ public class FileUtils {
if ( filenameForS3 == null ) // The error is logged inside. if ( filenameForS3 == null ) // The error is logged inside.
return null; return null;
String fileFullPath = targetDirectory + File.separator + fileName; // The fullPath to the local file (which has the previous name). String fileFullPath = targetDirectory + fileName; // The fullPath to the local file (which has the previous name).
String s3Url = null; String s3Url = null;
try { try {
s3Url = s3ObjectStore.uploadToS3(filenameForS3, fileFullPath); s3Url = s3ObjectStore.uploadToS3(filenameForS3, fileFullPath);