forked from lsmyrnaios/UrlsController
- Optimize the "DOC_URL_FILTER"-regex, by using a non-capturing group.
- Remove an extra "File.separator" from the fulltexts-fullFilePath.
This commit is contained in:
parent
bdc61c2cda
commit
5dadb8ad2f
|
@ -6,7 +6,7 @@ plugins {
|
||||||
|
|
||||||
java {
|
java {
|
||||||
group = 'eu.openaire.urls_controller'
|
group = 'eu.openaire.urls_controller'
|
||||||
version = '2.7.0-SNAPSHOT'
|
version = '2.6.3-SNAPSHOT'
|
||||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
private static String excludedDatasourceIDsStringList = null;
|
private static String excludedDatasourceIDsStringList = null;
|
||||||
|
|
||||||
|
|
||||||
private static final String DOC_URL_FILTER = ".+(pdf|download|/doc|document|(?:/|[?]|&)file|/fulltext|attachment|/paper|viewfile|viewdoc|/get|cgi/viewcontent.cgi\\?|t[ée]l[ée]charger|descargar).*";
|
private static final String DOC_URL_FILTER = ".+(?:pdf|download|/doc|document|(?:/|[?]|&)file|/fulltext|attachment|/paper|viewfile|viewdoc|/get|cgi/viewcontent.cgi\\?|t[ée]l[ée]charger|descargar).*";
|
||||||
// "DOC_URL_FILTER" works for lowerCase Strings (we use the "ignore-case" indicator in the "regexp_like()" method).
|
// "DOC_URL_FILTER" works for lowerCase Strings (we use the "ignore-case" indicator in the "regexp_like()" method).
|
||||||
|
|
||||||
|
|
||||||
|
@ -329,7 +329,7 @@ public class UrlsServiceImpl implements UrlsService {
|
||||||
|
|
||||||
DatabaseConnector.databaseLock.lock();
|
DatabaseConnector.databaseLock.lock();
|
||||||
// Lock the DB here so the prefilled-Payloads which will be generated inside the "getTasksForCreatingAndUploadingParquetFiles()" method (using a dedicated query)
|
// Lock the DB here so the prefilled-Payloads which will be generated inside the "getTasksForCreatingAndUploadingParquetFiles()" method (using a dedicated query)
|
||||||
// will be synchronized with the insert of all attempt and payload records to the DB.
|
// will be synchronized with the insert of all attempt and payload records to the DB. This action is NOT a callable-task, so it runs during the execution of this method.
|
||||||
// This is important in order to avoid having workers take these records as assignments, when we know that payloads are ready to be inserted for them.
|
// This is important in order to avoid having workers take these records as assignments, when we know that payloads are ready to be inserted for them.
|
||||||
|
|
||||||
List<Callable<ParquetReport>> callableTasks = parquetFileUtils.getTasksForCreatingAndUploadingParquetFiles(urlReports, sizeOfUrlReports, curReportAssignmentsCounter, localParquetPath, uploadFullTextsResponse);
|
List<Callable<ParquetReport>> callableTasks = parquetFileUtils.getTasksForCreatingAndUploadingParquetFiles(urlReports, sizeOfUrlReports, curReportAssignmentsCounter, localParquetPath, uploadFullTextsResponse);
|
||||||
|
|
|
@ -598,7 +598,7 @@ public class FileUtils {
|
||||||
if ( filenameForS3 == null ) // The error is logged inside.
|
if ( filenameForS3 == null ) // The error is logged inside.
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
String fileFullPath = targetDirectory + File.separator + fileName; // The fullPath to the local file (which has the previous name).
|
String fileFullPath = targetDirectory + fileName; // The fullPath to the local file (which has the previous name).
|
||||||
String s3Url = null;
|
String s3Url = null;
|
||||||
try {
|
try {
|
||||||
s3Url = s3ObjectStore.uploadToS3(filenameForS3, fileFullPath);
|
s3Url = s3ObjectStore.uploadToS3(filenameForS3, fileFullPath);
|
||||||
|
|
Loading…
Reference in New Issue