diff --git a/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java b/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java index 2295c25..f320cd1 100644 --- a/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java +++ b/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java @@ -92,12 +92,12 @@ public class UrlController { } }// end loading-while-loop - if ( FileUtils.inputScanner != null ) - FileUtils.inputScanner.close(); + if ( FileUtils.inputScanner.get() != null ) // Check if the initial value is null. + FileUtils.inputScanner.get().close(); Assignment assignment = new Assignment((++assignmentId), tasks, workerId, new Date()); - logger.info("Sending assignment_" + assignment.getAssignmentId() + " with " + tasks.size() + " tasks (" + FileUtils.duplicateIdUrlEntries + " more tasks were discarded as duplicates), to worker with ID: " + workerId); + logger.info("Sending assignment_" + assignment.getAssignmentId() + " with " + tasks.size() + " tasks (" + FileUtils.duplicateIdUrlEntries.get() + " more tasks were discarded as duplicates), to worker with ID: " + workerId); return ResponseEntity.status(200).header("Content-Type", "application/json").body(new AssignmentResponse(assignment)); } diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index 13570d7..5715f5f 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -18,14 +18,14 @@ public class FileUtils { private static final Logger logger = LoggerFactory.getLogger(FileUtils.class); - public static Scanner inputScanner = null; - private static int fileIndex = 0; - private static int unretrievableInputLines = 0; - public static int duplicateIdUrlEntries = 0; - public static int jsonBatchSize = 3000; + public static ThreadLocal inputScanner = new ThreadLocal(); // Every Thread has its own variable. + private static ThreadLocal fileIndex = new ThreadLocal(); + private static ThreadLocal unretrievableInputLines = new ThreadLocal(); + public static ThreadLocal duplicateIdUrlEntries = new ThreadLocal(); + public static final int jsonBatchSize = 3000; private static final String utf8Charset = "UTF-8"; public static String inputFileFullPath; - private static String workingDir = System.getProperty("user.dir") + File.separator; + private static final String workingDir = System.getProperty("user.dir") + File.separator; public FileUtils() throws RuntimeException @@ -39,8 +39,10 @@ public class FileUtils { logger.debug("Going to retrieve the data from the inputResourceFile: " + resourceFileName); - FileUtils.inputScanner = new Scanner(inputStream, utf8Charset); - fileIndex = 0; // Re-initialize the file-number-pointer. + FileUtils.inputScanner.set(new Scanner(inputStream, utf8Charset)); + fileIndex.set(0); // Re-initialize the file-number-pointer. + unretrievableInputLines.set(0); + duplicateIdUrlEntries.set(0); } @@ -100,32 +102,32 @@ public class FileUtils { HashMultimap idAndUrlMappedInput = HashMultimap.create(expectedIDsPerBatch, expectedPathsPerID); - int curBeginning = fileIndex; + int curBeginning = fileIndex.get(); - while ( inputScanner.hasNextLine() && (fileIndex < (curBeginning + jsonBatchSize)) ) + while ( inputScanner.get().hasNextLine() && (fileIndex.get() < (curBeginning + jsonBatchSize)) ) {// While (!EOF) and inside the current url-batch, iterate through lines. - //logger.debug("fileIndex: " + FileUtils.fileIndex); // DEBUG! + //logger.debug("fileIndex: " + FileUtils.fileIndex.get()); // DEBUG! // Take each line, remove potential double quotes. - String retrievedLineStr = inputScanner.nextLine(); + String retrievedLineStr = inputScanner.get().nextLine(); //logger.debug("Loaded from inputFile: " + retrievedLineStr); // DEBUG! - fileIndex ++; + fileIndex.set(fileIndex.get() +1); if ( retrievedLineStr.isEmpty() ) { - unretrievableInputLines ++; + unretrievableInputLines.set(unretrievableInputLines.get() +1); continue; } if ( (inputIdUrlTuple = jsonDecoder(retrievedLineStr)) == null ) { // Decode the jsonLine and take the two attributes. logger.warn("A problematic inputLine found: \t" + retrievedLineStr); - unretrievableInputLines ++; + unretrievableInputLines.set(unretrievableInputLines.get() +1); continue; } if ( !idAndUrlMappedInput.put(inputIdUrlTuple.getId(), inputIdUrlTuple.getUrl()) ) { // We have a duplicate url in the input.. log it here as we cannot pass it through the HashMultimap. It's possible that this as well as the original might be/give a docUrl. - duplicateIdUrlEntries ++; + duplicateIdUrlEntries.set(duplicateIdUrlEntries.get() +1); } } @@ -139,7 +141,7 @@ public class FileUtils { */ public static int getCurrentlyLoadedUrls() // In the end, it gives the total number of urls we have processed. { - return FileUtils.fileIndex - FileUtils.unretrievableInputLines; + return FileUtils.fileIndex.get() - FileUtils.unretrievableInputLines.get(); }