Allow handling of concurrent requests to the "getTestUrls"-endpoint.

springify_project
Lampros Smyrnaios 3 years ago
parent 308cab5ecd
commit c194af167f

@ -92,12 +92,12 @@ public class UrlController {
}
}// end loading-while-loop
if ( FileUtils.inputScanner != null )
FileUtils.inputScanner.close();
if ( FileUtils.inputScanner.get() != null ) // Check if the initial value is null.
FileUtils.inputScanner.get().close();
Assignment assignment = new Assignment((++assignmentId), tasks, workerId, new Date());
logger.info("Sending assignment_" + assignment.getAssignmentId() + " with " + tasks.size() + " tasks (" + FileUtils.duplicateIdUrlEntries + " more tasks were discarded as duplicates), to worker with ID: " + workerId);
logger.info("Sending assignment_" + assignment.getAssignmentId() + " with " + tasks.size() + " tasks (" + FileUtils.duplicateIdUrlEntries.get() + " more tasks were discarded as duplicates), to worker with ID: " + workerId);
return ResponseEntity.status(200).header("Content-Type", "application/json").body(new AssignmentResponse(assignment));
}

@ -18,14 +18,14 @@ public class FileUtils {
private static final Logger logger = LoggerFactory.getLogger(FileUtils.class);
public static Scanner inputScanner = null;
private static int fileIndex = 0;
private static int unretrievableInputLines = 0;
public static int duplicateIdUrlEntries = 0;
public static int jsonBatchSize = 3000;
public static ThreadLocal<Scanner> inputScanner = new ThreadLocal<Scanner>(); // Every Thread has its own variable.
private static ThreadLocal<Integer> fileIndex = new ThreadLocal<Integer>();
private static ThreadLocal<Integer> unretrievableInputLines = new ThreadLocal<Integer>();
public static ThreadLocal<Integer> duplicateIdUrlEntries = new ThreadLocal<Integer>();
public static final int jsonBatchSize = 3000;
private static final String utf8Charset = "UTF-8";
public static String inputFileFullPath;
private static String workingDir = System.getProperty("user.dir") + File.separator;
private static final String workingDir = System.getProperty("user.dir") + File.separator;
public FileUtils() throws RuntimeException
@ -39,8 +39,10 @@ public class FileUtils {
logger.debug("Going to retrieve the data from the inputResourceFile: " + resourceFileName);
FileUtils.inputScanner = new Scanner(inputStream, utf8Charset);
fileIndex = 0; // Re-initialize the file-number-pointer.
FileUtils.inputScanner.set(new Scanner(inputStream, utf8Charset));
fileIndex.set(0); // Re-initialize the file-number-pointer.
unretrievableInputLines.set(0);
duplicateIdUrlEntries.set(0);
}
@ -100,32 +102,32 @@ public class FileUtils {
HashMultimap<String, String> idAndUrlMappedInput = HashMultimap.create(expectedIDsPerBatch, expectedPathsPerID);
int curBeginning = fileIndex;
int curBeginning = fileIndex.get();
while ( inputScanner.hasNextLine() && (fileIndex < (curBeginning + jsonBatchSize)) )
while ( inputScanner.get().hasNextLine() && (fileIndex.get() < (curBeginning + jsonBatchSize)) )
{// While (!EOF) and inside the current url-batch, iterate through lines.
//logger.debug("fileIndex: " + FileUtils.fileIndex); // DEBUG!
//logger.debug("fileIndex: " + FileUtils.fileIndex.get()); // DEBUG!
// Take each line, remove potential double quotes.
String retrievedLineStr = inputScanner.nextLine();
String retrievedLineStr = inputScanner.get().nextLine();
//logger.debug("Loaded from inputFile: " + retrievedLineStr); // DEBUG!
fileIndex ++;
fileIndex.set(fileIndex.get() +1);
if ( retrievedLineStr.isEmpty() ) {
unretrievableInputLines ++;
unretrievableInputLines.set(unretrievableInputLines.get() +1);
continue;
}
if ( (inputIdUrlTuple = jsonDecoder(retrievedLineStr)) == null ) { // Decode the jsonLine and take the two attributes.
logger.warn("A problematic inputLine found: \t" + retrievedLineStr);
unretrievableInputLines ++;
unretrievableInputLines.set(unretrievableInputLines.get() +1);
continue;
}
if ( !idAndUrlMappedInput.put(inputIdUrlTuple.getId(), inputIdUrlTuple.getUrl()) ) { // We have a duplicate url in the input.. log it here as we cannot pass it through the HashMultimap. It's possible that this as well as the original might be/give a docUrl.
duplicateIdUrlEntries ++;
duplicateIdUrlEntries.set(duplicateIdUrlEntries.get() +1);
}
}
@ -139,7 +141,7 @@ public class FileUtils {
*/
public static int getCurrentlyLoadedUrls() // In the end, it gives the total number of urls we have processed.
{
return FileUtils.fileIndex - FileUtils.unretrievableInputLines;
return FileUtils.fileIndex.get() - FileUtils.unretrievableInputLines.get();
}

Loading…
Cancel
Save