- Improve error-handling
- Use new class-model from PublicationsRetriever software. - Set new version.
This commit is contained in:
parent
83e6d761dd
commit
b70ae3ed58
|
@ -6,7 +6,7 @@ plugins {
|
|||
|
||||
java {
|
||||
group = 'eu.openaire.urls_worker'
|
||||
version = '2.1.15-SNAPSHOT'
|
||||
version = '2.1.15'
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.springframework.boot.web.client.RestTemplateBuilder;
|
|||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.client.HttpClientErrorException;
|
||||
import org.springframework.web.client.HttpServerErrorException;
|
||||
import org.springframework.web.client.RestClientException;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
@ -260,6 +261,9 @@ public class AssignmentsHandler {
|
|||
} catch (HttpServerErrorException hsee) {
|
||||
logger.error("The Controller failed to handle the WorkerReport of assignments_" + assignmentRequestCounter + ": " + hsee.getMessage());
|
||||
return false;
|
||||
} catch (HttpClientErrorException hcee) {
|
||||
logger.error("Worker error when submitting the WorkerReport of assignments_" + assignmentRequestCounter + " to the Controller: " + hcee.getMessage());
|
||||
return false;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error when submitting the WorkerReport of assignments_" + assignmentRequestCounter + " to the Controller: ", e);
|
||||
return false;
|
||||
|
|
|
@ -161,7 +161,7 @@ public class ScheduledTasks {
|
|||
}
|
||||
usableDirsNum = fulltextSubDirs.length;
|
||||
if ( usableDirsNum == 0 ) {
|
||||
logger.debug("The \"fullTextsBaseDir\" is empty, so there is nothing to delete.");
|
||||
logger.debug("The \"fullTextsBaseDir\" is empty, so there is nothing to check and delete.");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -188,7 +188,7 @@ public class ScheduledTasks {
|
|||
String assingmentsCounterString = matcher.group(1);
|
||||
if ( (assingmentsCounterString != null) && !assingmentsCounterString.isEmpty()) {
|
||||
if ( FullTextsController.deleteFile(this.workerReportsDirPath + this.workerId + "_assignments_" + assingmentsCounterString + "_report.json") )
|
||||
logger.warn("The subDir \"" + subDirName + "\" probably contains some failed files, since the workerReport for assignments_" + assingmentsCounterString + " was deleted only now, which means the Controller failed to successfully process the results of those assignments.");
|
||||
logger.warn("The subDir \"" + subDirName + "\" probably contains some failed files, since the workerReport for assignments_" + assingmentsCounterString + " was deleted just now, which means the Controller failed to successfully process the results of those assignments.");
|
||||
} else
|
||||
logger.error("The subDir \"" + subDirName + "\" has an invalid name! It does not contains the assignmentsCounter!");
|
||||
} else
|
||||
|
|
|
@ -47,8 +47,9 @@ public class PublicationsRetrieverPlugin {
|
|||
LoaderAndChecker.retrieveDocuments = true;
|
||||
LoaderAndChecker.retrieveDatasets = false;
|
||||
ConnSupportUtils.setKnownMimeTypes();
|
||||
FileUtils.shouldDownloadDocFiles = true;
|
||||
FileUtils.docFileNameType = FileUtils.DocFileNameType.idName;
|
||||
|
||||
ArgsUtils.shouldDownloadDocFiles = true;
|
||||
ArgsUtils.fileNameType = ArgsUtils.fileNameTypeEnum.idName;
|
||||
ArgsUtils.targetUrlType = "docUrl";
|
||||
FileUtils.jsonBatchSize = maxAssignmentsLimitPerBatch;
|
||||
|
||||
|
@ -56,7 +57,7 @@ public class PublicationsRetrieverPlugin {
|
|||
|
||||
ConnSupportUtils.shouldBlockMost5XXDomains = false; // If this is "true", all but the "503" will be blocked. Otherwise, only the "511" will be blocked.
|
||||
LoaderAndChecker.setCouldRetryRegex();
|
||||
UrlTypeChecker.setURLDirectoryFilterRegex();
|
||||
UrlTypeChecker.setRuntimeInitializedRegexes();
|
||||
|
||||
cookieStore = HttpConnUtils.cookieManager.getCookieStore();
|
||||
|
||||
|
@ -79,13 +80,13 @@ public class PublicationsRetrieverPlugin {
|
|||
// At this point, the "assignmentsBasePath"-directory has already been successfully created.
|
||||
|
||||
String currentAssignmentsSubDir = "assignments_" + assignmentRequestCounter + "_fullTexts" + File.separator;
|
||||
FileUtils.storeDocFilesDir = assignmentsBasePath + currentAssignmentsSubDir; // It needs the last separator, because of how the docFiles are named and stored.
|
||||
ArgsUtils.storeDocFilesDir = assignmentsBasePath + currentAssignmentsSubDir; // It needs the last separator, because of how the docFiles are named and stored.
|
||||
|
||||
File curAssignmentsDirs = new File(FileUtils.storeDocFilesDir);
|
||||
File curAssignmentsDirs = new File(ArgsUtils.storeDocFilesDir);
|
||||
try {
|
||||
if ( !curAssignmentsDirs.exists() ) {
|
||||
if ( !curAssignmentsDirs.mkdirs() ) // Try to create the directory(-ies) if they don't exist. If they exist OR if sth went wrong, the result is the same: "false".
|
||||
throw new RuntimeException("Could not create the \"" + currentAssignmentsSubDir + "\" directories: \"" + FileUtils.storeDocFilesDir + "\"!");
|
||||
throw new RuntimeException("Could not create the \"" + currentAssignmentsSubDir + "\" directories: \"" + ArgsUtils.storeDocFilesDir + "\"!");
|
||||
} else
|
||||
logger.warn("The curAssignmentsDirs: \"" + currentAssignmentsSubDir + "\" already exist! Probably left behind by a previous execution..");
|
||||
} catch (Exception e) { // Mainly a SecurityException.
|
||||
|
@ -167,7 +168,7 @@ public class PublicationsRetrieverPlugin {
|
|||
callableTasks.clear(); // Reset the thread-tasks-list for the next batch.
|
||||
ConnSupportUtils.domainsWithConnectionData.clear(); // This data is not useful for the next batch, since plenty of time will have passed before needing to check the "lastConnectedTime" for each domain, in order to apply the "politenessDelay".
|
||||
|
||||
addUrlReportsToWorkerReport(assignments);
|
||||
addUrlReportsToWorkerReport(assignments, assignmentRequestCounter);
|
||||
|
||||
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
|
||||
// In the next batch, the previously stored files might have been already delivered to the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
|
||||
|
@ -185,11 +186,17 @@ public class PublicationsRetrieverPlugin {
|
|||
private static final String provenance = "crawl:PublicationsRetriever";
|
||||
|
||||
|
||||
public static void addUrlReportsToWorkerReport(Collection<Assignment> assignments)
|
||||
public static void addUrlReportsToWorkerReport(Collection<Assignment> assignments, long assignmentRequestCounter)
|
||||
{
|
||||
if ( FileUtils.dataForOutput.size() != assignments.size() ) {
|
||||
logger.warn("The number of the results (" + FileUtils.dataForOutput.size() + ") is different from the number of the given assignments (" + assignments.size() + ")!");
|
||||
int numOfResults = FileUtils.dataForOutput.size();
|
||||
if ( numOfResults == 0 ) {
|
||||
logger.warn("No results were produced while processing assignments_" + assignmentRequestCounter);
|
||||
return; // Just return and send a WorkerReport with an empty "urlsReports"-list to the Controller, in order to notify it that something went wrong with this batch.
|
||||
}
|
||||
else if ( numOfResults != assignments.size() ) {
|
||||
logger.warn("The number of the results (" + numOfResults + ") is different from the number of the given assignments (" + assignments.size() + ")!");
|
||||
} // TODO - Should any other step be taken, except from just showing the log-message?
|
||||
// In any of the above cases, a serious exception was thrown by the "PublicationRetriever's code, before some or all of dataForOutput were added in the list.
|
||||
|
||||
// Index the UrlIds with the DatasourceIds for quick-search later. The datasourceIds are not included in the "DataForOutput" objects.
|
||||
HashMap<String, String> urlIdsWithDatasourceIds = new HashMap<>(assignments.size());
|
||||
|
|
Loading…
Reference in New Issue