- Allow the user to set the "maxAssignmentsLimitPerBatch" value.

- Set increased lower and upper limits for the Java Heap Size.
- Update the "ServerBaseURL" to the Public IP Address of the machine which is running the app.
- Improve two log-messages.
This commit is contained in:
Lampros Smyrnaios 2021-12-07 00:52:40 +02:00
parent ce49bff50e
commit fd5b56e3c6
8 changed files with 73 additions and 20 deletions

View File

@ -48,6 +48,11 @@ configurations {
all*.exclude group: 'ch.qos.logback'
}
// Set increased lower and upper limits for the java-execution.
tasks.withType(JavaExec) {
jvmArgs = ['-Xms512m', '-Xmx8g']
}
test {
useJUnitPlatform()
}

View File

@ -17,11 +17,14 @@ if [[ ! -f $inputDataFile ]]; then
echo "Give the ID of this worker:"
read -r workerId
echo -e "\nGive the max-assignments-limit-per-batch for the Worker to handle: "
read -r maxAssignmentsLimitPerBatch
echo -e "\nGive the baseUrl of the controller (e.g.: http://IP:PORT/api/):"
read -r controllerBaseUrl
touch $inputDataFile
echo "$workerId,$controllerBaseUrl" >> $inputDataFile
echo "$workerId,$maxAssignmentsLimitPerBatch,$controllerBaseUrl" >> $inputDataFile
echo -e "\n\n"
fi

View File

@ -3,7 +3,9 @@ package eu.openaire.urls_worker;
import eu.openaire.publications_retriever.PublicationsRetriever;
import eu.openaire.publications_retriever.util.file.FileUtils;
import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin;
import eu.openaire.urls_worker.util.AssignmentsHandler;
import eu.openaire.urls_worker.util.UriBuilder;
import eu.openaire.urls_worker.util.WorkerConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.CommandLineRunner;
@ -34,6 +36,7 @@ public class UrlsWorkerApplication {
private static final String inputDataFilePath = FileUtils.workingDir + "inputData.txt";
public static String workerId = null;
public static int maxAssignmentsLimitPerBatch = 0;
public static String controllerBaseUrl = null; // BaseUrl template: "http://IP:PORT/api/"
@ -41,6 +44,7 @@ public class UrlsWorkerApplication {
setInputData(); // This may cause the Server to terminate early, in case the workerId or the controllerBaseUrl cannot be found.
new PublicationsRetrieverPlugin();
new AssignmentsHandler();
SpringApplication.run(UrlsWorkerApplication.class, args);
@ -109,26 +113,33 @@ public class UrlsWorkerApplication {
myReader = new Scanner(inputDataFile);
if ( myReader.hasNextLine() ) {
String[] data = myReader.nextLine().split(",");
if ( data.length < 2 ) {
if ( data.length < 3 ) {
String errorMsg = "Not all data were retrieved from file \"" + inputDataFilePath + "\"!";
logger.error(errorMsg);
System.err.println(errorMsg);
System.exit(61);
}
workerId = data[0].trim();
controllerBaseUrl = data[1].trim();
String maxAssignmentsLimitStr = data[1].trim();
try {
maxAssignmentsLimitPerBatch = Integer.parseInt(maxAssignmentsLimitStr);
} catch (NumberFormatException nfe) {
logger.warn("The given \"maxAssignmentsLimitPerBatch\" (" + maxAssignmentsLimitStr + ") was not a number! Will use the default one: " + WorkerConstants.ASSIGNMENTS_LIMIT);
maxAssignmentsLimitPerBatch = WorkerConstants.ASSIGNMENTS_LIMIT;
}
controllerBaseUrl = data[2].trim();
if ( !controllerBaseUrl.endsWith("/") )
controllerBaseUrl += "/"; // Make sure the whole urls will not break later.
controllerBaseUrl += "/"; // Make sure the other urls will not break later.
}
if ( (workerId == null) || (controllerBaseUrl == null) ) {
String errorMsg = "No \"workerId\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath;
if ( (workerId == null) || (maxAssignmentsLimitPerBatch == 0) || (controllerBaseUrl == null) ) {
String errorMsg = "No \"workerId\" or/and \"maxAssignmentsLimitPerBatch\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath;
logger.error(errorMsg);
System.err.println(errorMsg);
System.exit(62);
}
logger.info("workerId: " + workerId + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs.
logger.info("workerId: " + workerId + ", maxAssignmentsLimitPerBatch: " + maxAssignmentsLimitPerBatch + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs.
} catch (Exception e) {
String errorMsg = "An error prevented the retrieval of the workerId and the controllerBaseUrl from the file: " + inputDataFilePath + "\n" + e.getMessage();

View File

@ -50,7 +50,7 @@ public class FullTextsController {
return ResponseEntity.badRequest().body(errorMsg);
}
logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesListNum + " full-texts, from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter);
logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesListNum + " full-texts, from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter + " (out of " + totalZipBatches + ").");
String currentAssignmentsBaseFullTextsPath = assignmentsBaseDir + "assignments_" + assignmentsCounter + "_fullTexts" + File.separator;
@ -82,7 +82,7 @@ public class FullTextsController {
// If this is the last batch for this assignments-count, then make sure it is deleted in the next scheduled delete-operation.
if ( zipBatchCounter == totalZipBatches ) {
assignmentsNumsHandledAndLocallyDeleted.put(assignmentsCounter, false);
logger.debug("Will return the last batch of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later.");
logger.debug("Will return the last batch (" + zipBatchCounter + ") of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later.");
}
String contentType = request.getServletContext().getMimeType(zipFileFullPath);

View File

@ -40,7 +40,7 @@ public class GeneralController {
if ( AssignmentsHandler.isAvailableForWork ) {
logger.info("The worker is available for an assignment.");
return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT));
return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, UrlsWorkerApplication.maxAssignmentsLimitPerBatch));
} else {
logger.info("The worker is busy with another assignment.");
return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).build();

View File

@ -9,13 +9,13 @@ import eu.openaire.publications_retriever.util.http.HttpConnUtils;
import eu.openaire.publications_retriever.util.url.DataToBeLogged;
import eu.openaire.publications_retriever.util.url.LoaderAndChecker;
import eu.openaire.publications_retriever.util.url.UrlUtils;
import eu.openaire.urls_worker.UrlsWorkerApplication;
import eu.openaire.urls_worker.models.Assignment;
import eu.openaire.urls_worker.models.Error;
import eu.openaire.urls_worker.models.Payload;
import eu.openaire.urls_worker.models.UrlReport;
import eu.openaire.urls_worker.services.FileStorageService;
import eu.openaire.urls_worker.util.AssignmentsHandler;
import eu.openaire.urls_worker.util.WorkerConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -42,7 +42,7 @@ public class PublicationsRetrieverPlugin {
FileUtils.shouldDownloadDocFiles = true;
FileUtils.docFileNameType = FileUtils.DocFileNameType.idName;
PublicationsRetriever.targetUrlType = "docUrl";
FileUtils.jsonBatchSize = WorkerConstants.ASSIGNMENTS_LIMIT;
FileUtils.jsonBatchSize = UrlsWorkerApplication.maxAssignmentsLimitPerBatch;
assignmentsBasePath = FileStorageService.assignmentsLocation.toString();
if ( !assignmentsBasePath.endsWith(File.separator) )

View File

@ -25,10 +25,10 @@ public class AssignmentsHandler {
private static final Logger logger = LoggerFactory.getLogger(AssignmentsHandler.class);
public static boolean isAvailableForWork = true;
public static final List<UrlReport> urlReports = new ArrayList<>(WorkerConstants.ASSIGNMENTS_LIMIT);
public static List<UrlReport> urlReports = null;
private static final int expectedDatasourcesPerRequest = 1400; // Per 10_000 assignments.
private static final int expectedAssignmentsPerDatasource = (WorkerConstants.ASSIGNMENTS_LIMIT / expectedDatasourcesPerRequest);
public static final Multimap<String, Assignment> assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource);
private static int expectedAssignmentsPerDatasource = 0;
public static Multimap<String, Assignment> assignmentsForPlugins = null;
private static final boolean askForTest = false; // Enable this only for testing.
private static final Duration requestConnectTimeoutDuration = Duration.ofMinutes(1); // 1 minute.
@ -37,9 +37,17 @@ public class AssignmentsHandler {
public static final RestTemplate restTemplate = new RestTemplateBuilder().setConnectTimeout(requestConnectTimeoutDuration).setReadTimeout(requestReadTimeoutDuration).build();
public AssignmentsHandler()
{
urlReports = new ArrayList<>(UrlsWorkerApplication.maxAssignmentsLimitPerBatch);
expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest);
assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource);
}
public static AssignmentsRequest requestAssignments()
{
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + UrlsWorkerApplication.maxAssignmentsLimitPerBatch;
logger.info("Going to request assignments from the controller-server: " + requestUrl);
AssignmentsRequest assignmentRequest = null;

View File

@ -5,7 +5,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.env.Environment;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URL;
public class UriBuilder {
@ -25,7 +29,10 @@ public class UriBuilder {
baseUrl += "://";
String hostName = InetAddress.getLoopbackAddress().getHostName(); // Non-null.
String hostName = getPublicIP();
if ( hostName == null )
hostName = InetAddress.getLoopbackAddress().getHostName(); // Non-null.
baseUrl += hostName;
String serverPort = environment.getProperty("server.port");
@ -36,11 +43,11 @@ public class UriBuilder {
baseUrl += ":" + serverPort;
String baseInternalPath = environment.getProperty("server.servlet.context-path");
if (baseInternalPath != null) {
if (!baseInternalPath.startsWith("/"))
if ( baseInternalPath != null ) {
if ( !baseInternalPath.startsWith("/") )
baseUrl += "/";
baseUrl += baseInternalPath;
if (!baseInternalPath.endsWith("/"))
if ( !baseInternalPath.endsWith("/") )
baseUrl += "/";
} else {
logger.warn("No property \"server.servlet.context-path\" was found in \"application.properties\"!"); // Yes it's expected.
@ -50,6 +57,25 @@ public class UriBuilder {
logger.debug("ServerBaseURL: " + baseUrl);
}
private static String getPublicIP()
{
String publicIpAddress = "";
URL url_name;
try {
url_name = new URL("https://api.ipify.org/");
} catch (MalformedURLException mue) {
logger.warn(mue.getMessage());
return null;
}
try ( BufferedReader bf = new BufferedReader(new InputStreamReader(url_name.openStream()))) {
publicIpAddress = bf.readLine().trim();
} catch (Exception e) {
logger.warn("Cannot get the publicIP address for this machine!", e);
return null;
}
return publicIpAddress;
}
public static String getBaseUrl() {
return baseUrl;
}