- Add the "getTestUrls"-endpoint which returns an "Assignment" with data retrieved from the added resource-file.

- Update the "getUrls"-endpoint to be ready to retrieve data from the database, once it's added.
- Update the dependencies.
- Code cleanup.
This commit is contained in:
Lampros Smyrnaios 2021-05-18 17:23:20 +03:00
parent d3588ea36b
commit e2cc320baf
7 changed files with 1232 additions and 156 deletions

View File

@ -1,11 +1,11 @@
buildscript { buildscript {
ext { ext {
springSecurityVersion = "5.4.5" springSecurityVersion = "5.4.6"
} }
} }
plugins { plugins {
id 'org.springframework.boot' version '2.4.3' id 'org.springframework.boot' version '2.4.5'
id 'java' id 'java'
} }
@ -31,8 +31,10 @@ dependencies {
implementation("org.springframework.security:spring-security-config:${springSecurityVersion}") implementation("org.springframework.security:spring-security-config:${springSecurityVersion}")
implementation("io.jsonwebtoken:jjwt:0.9.1") implementation("io.jsonwebtoken:jjwt:0.9.1")
// https://mvnrepository.com/artifact/com.google.guava/guava
implementation group: 'com.google.guava', name: 'guava', version: '30.1.1-jre'
implementation "org.projectlombok:lombok:1.18.18" implementation "org.projectlombok:lombok:1.18.20"
implementation 'com.google.code.gson:gson:2.8.6' implementation 'com.google.code.gson:gson:2.8.6'
implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final' implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'
implementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.3' implementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.3'

View File

@ -1,15 +1,19 @@
package eu.openaire.urls_controller.controllers; package eu.openaire.urls_controller.controllers;
import eu.openaire.urls_controller.models.UrlToCheck; import com.google.common.collect.HashMultimap;
import eu.openaire.urls_controller.payloads.requests.UrlsRequest; import eu.openaire.urls_controller.models.Assignment;
import eu.openaire.urls_controller.payloads.responces.UrlsResponse; import eu.openaire.urls_controller.models.Task;
import eu.openaire.urls_controller.payloads.requests.WorkerRequest;
import eu.openaire.urls_controller.payloads.responces.AssignmentResponse;
import eu.openaire.urls_controller.util.FileUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*; import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.ArrayList; import java.util.*;
import java.util.List;
@RestController @RestController
@RequestMapping("/urls") @RequestMapping("/urls")
@ -24,39 +28,68 @@ public class UrlController {
@GetMapping("") @GetMapping("")
public ResponseEntity<?> getUrls() { public ResponseEntity<?> getUrls(WorkerRequest workerRequest) {
List<UrlToCheck> urlsToCheck = new ArrayList<>(); String workerId = workerRequest.getWorkerId();
int tasksLimit = workerRequest.getTasksLimit();
List<Task> tasks = new ArrayList<>();
// TODO - Retrieve the ID-URL pairs from the database. // TODO - Create the Assignment from the id-urls stored in the database up to the tasks-limit.
// For now, we give static data. Assignment assignment = new Assignment(tasks, workerId, new Date());
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::173a4e29249d4a67e72148ff50a88968", "http://dx.doi.org/10.1590/s0034-76122012000200012"));
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::220cb83d8a6e7a90ac4ac72feb030700", "https://doaj.org/toc/2214-9147"));
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::2b29092d3ed25d7ccfac894974e0915e", "http://www.plantintroduction.org/index.php/pi/article/view/1201"));
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::2fed1ea2149efd0d5a7837240a4aaf71", "http://europepmc.org/articles/pmc2882784?pdf=render"));
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::180e60bbb541a9b9a9313779887da9cf", "http://dx.doi.org/10.1051/e3sconf/202016405016"));
logger.debug("ID-URL pairs to return:\n" + urlsToCheck); // TODO - Write the Assignment details to the database and then send it to the worker.
return ResponseEntity.status(200).header("Content-Type", "application/json").body(new UrlsResponse(urlsToCheck).toString()); return ResponseEntity.status(200).header("Content-Type", "application/json").body(new AssignmentResponse(assignment));
} }
@GetMapping("test")
public ResponseEntity<?> getTestUrls(WorkerRequest workerRequest) {
@PostMapping("") String workerId = workerRequest.getWorkerId();
// TODO - Add authorization to edit the database. int tasksLimit = workerRequest.getTasksLimit();
public ResponseEntity<?> addResults(@RequestBody UrlsRequest urlsRequest) {
logger.debug(urlsRequest.toString()); List<Task> tasks = new ArrayList<>();
new FileUtils(); // Find the input file.
HashMultimap<String, String> loadedIdUrlPairs;
boolean isFirstRun = true;
boolean tasksLimitReached = false;
// TODO - Add data in the DataBase.. // Start loading urls.
while ( true )
{
loadedIdUrlPairs = FileUtils.getNextIdUrlPairBatchFromJson(); // Take urls from jsonFile.
if ( FileUtils.isFinishedLoading(loadedIdUrlPairs.isEmpty(), isFirstRun) ) // Throws RuntimeException which is automatically passed on.
break;
else
isFirstRun = false;
return ResponseEntity.ok().build(); Set<Map.Entry<String, String>> pairs = loadedIdUrlPairs.entries();
for ( Map.Entry<String,String> pair : pairs )
{
if ( tasks.size() > tasksLimit ) {
tasksLimitReached = true;
break;
} }
tasks.add(new Task(pair.getKey(), pair.getValue()));
}// end pairs-for-loop
if ( tasksLimitReached ) {
logger.debug("Done loading tasksLimit (" + tasksLimit + ") urls from the inputFile.");
break;
}
}// end loading-while-loop
Assignment assignment = new Assignment(tasks, workerId, new Date());
return ResponseEntity.status(200).header("Content-Type", "application/json").body(new AssignmentResponse(assignment));
}
} }

View File

@ -1,50 +0,0 @@
package eu.openaire.urls_controller.models;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonPropertyOrder({
"id",
"url"
})
public class UrlToCheck { // This model will not match with a database,
@JsonProperty("id")
private String id;
@JsonProperty("url")
private String url;
public UrlToCheck() {
}
public UrlToCheck(String id, String url) {
this.id = id;
this.url = url;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
@Override
public String toString() {
return "{\"id\":\"" + id + "\",\"url\":\"" + url + "\"}\n";
}
}

View File

@ -1,41 +0,0 @@
package eu.openaire.urls_controller.payloads.requests;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import eu.openaire.urls_controller.models.Payload;
import java.util.List;
@JsonInclude(JsonInclude.Include.NON_NULL)
public class UrlsRequest {
@JsonProperty("payloads")
private List<Payload> payloads;
public void UrlsResponse() {
}
public void UrlsResponse(List<Payload> payloads) {
this.payloads = payloads;
}
public UrlsRequest get() {
return this;
}
public List<Payload> getUrlsToCheck() {
return payloads;
}
public void setUrlsToCheck(List<Payload> payloads) {
this.payloads = payloads;
}
@Override
public String toString() {
return "UrlsRequest{" +
"payloads=" + payloads +
'}';
}
}

View File

@ -1,38 +0,0 @@
package eu.openaire.urls_controller.payloads.responces;
import eu.openaire.urls_controller.models.UrlToCheck;
import java.util.List;
public class UrlsResponse {
private List<UrlToCheck> urlsToCheck;
public UrlsResponse() {
}
public UrlsResponse(List<UrlToCheck> urlsToCheck) {
this.urlsToCheck = urlsToCheck;
}
public UrlsResponse get() {
return this;
}
public List<UrlToCheck> getUrlsToCheck() {
return urlsToCheck;
}
public void setUrlsToCheck(List<UrlToCheck> urlToCheck) {
this.urlsToCheck = urlToCheck;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(urlsToCheck.size());
for ( UrlToCheck urlToCheck : urlsToCheck ) {
sb.append(urlToCheck);
}
return sb.toString();
}
}

View File

@ -0,0 +1,170 @@
package eu.openaire.urls_controller.util;
import com.google.common.collect.HashMultimap;
import eu.openaire.urls_controller.models.Task;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.configurationprocessor.json.JSONException;
import org.springframework.boot.configurationprocessor.json.JSONObject;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Scanner;
public class FileUtils {
private static final Logger logger = LoggerFactory.getLogger(FileUtils.class);
private static Scanner inputScanner = null;
private static int fileIndex = 0;
private static int unretrievableInputLines = 0;
private static int duplicateIdUrlEntries = 0;
public static int jsonBatchSize = 3000;
private static final String utf8Charset = "UTF-8";
public static String inputFileFullPath;
private static String userDir = System.getProperty("user.dir") + File.separator;
public FileUtils()
{
inputFileFullPath = userDir + "src" + File.separator + "main" + File.separator + "resources";
String resourceFileName = "testInputFiles" + File.separator + "orderedList1000.json";
inputFileFullPath += File.separator + resourceFileName;
InputStream inputStream = getClass().getClassLoader().getResourceAsStream(resourceFileName);
if ( inputStream == null ) {
logger.error("No resourceFile was found with name \"" + resourceFileName + "\".");
return;
}
logger.debug("Going to retrieve the data from the inputResourceFile: " + resourceFileName);
FileUtils.inputScanner = new Scanner(inputStream, utf8Charset);
fileIndex = 0; // Re-initialize the file-number-pointer.
}
// This is currently not used, but it may be useful in a future scenario.
private static long getInputFileLinesNum()
{
long numOfLines = 0;
try {
numOfLines = Files.lines(Paths.get(inputFileFullPath)).count();
logger.debug("The numOfLines in the inputFile is " + numOfLines);
} catch (IOException e) {
logger.error("Could not retrieve the numOfLines. " + e);
return -1;
}
return numOfLines;
}
/**
* This method decodes a Jason String into its members.
* @param jsonLine String
* @return HashMap<String,String>
*/
public static Task jsonDecoder(String jsonLine)
{
// Get ID and url and put them in the HashMap
String idStr = null;
String urlStr = null;
try {
JSONObject jObj = new JSONObject(jsonLine); // Construct a JSONObject from the retrieved jsonLine.
idStr = jObj.get("id").toString();
urlStr = jObj.get("url").toString();
} catch (JSONException je) {
logger.warn("JSONException caught when tried to parse and extract values from jsonLine: \t" + jsonLine, je);
return null;
}
if ( urlStr.isEmpty() ) {
if ( !idStr.isEmpty() ) // If we only have the id, then go and log it.
logger.warn("The url was not found for id: \"" + idStr + "\"");
return null;
}
return new Task(idStr, urlStr);
}
/**
* This method parses a Json file and extracts the urls, along with the IDs.
* @return HashMultimap<String, String>
*/
public static HashMultimap<String, String> getNextIdUrlPairBatchFromJson()
{
Task inputIdUrlTuple;
int expectedPathsPerID = 5;
int expectedIDsPerBatch = jsonBatchSize / expectedPathsPerID;
HashMultimap<String, String> idAndUrlMappedInput = HashMultimap.create(expectedIDsPerBatch, expectedPathsPerID);
int curBeginning = fileIndex;
while ( inputScanner.hasNextLine() && (fileIndex < (curBeginning + jsonBatchSize)) )
{// While (!EOF) and inside the current url-batch, iterate through lines.
//logger.debug("fileIndex: " + FileUtils.fileIndex); // DEBUG!
// Take each line, remove potential double quotes.
String retrievedLineStr = inputScanner.nextLine();
//logger.debug("Loaded from inputFile: " + retrievedLineStr); // DEBUG!
fileIndex ++;
if ( retrievedLineStr.isEmpty() ) {
unretrievableInputLines ++;
continue;
}
if ( (inputIdUrlTuple = jsonDecoder(retrievedLineStr)) == null ) { // Decode the jsonLine and take the two attributes.
logger.warn("A problematic inputLine found: \t" + retrievedLineStr);
unretrievableInputLines ++;
continue;
}
if ( !idAndUrlMappedInput.put(inputIdUrlTuple.getId(), inputIdUrlTuple.getUrl()) ) { // We have a duplicate url in the input.. log it here as we cannot pass it through the HashMultimap. It's possible that this as well as the original might be/give a docUrl.
duplicateIdUrlEntries ++;
}
}
return idAndUrlMappedInput;
}
/**
* This method returns the number of (non-heading, non-empty) lines we have read from the inputFile.
* @return loadedUrls
*/
public static int getCurrentlyLoadedUrls() // In the end, it gives the total number of urls we have processed.
{
return FileUtils.fileIndex - FileUtils.unretrievableInputLines;
}
/**
* This method checks if there is no more input-data and returns true in that case.
* Otherwise, it returns false, if there is more input-data to be loaded.
* A "RuntimeException" is thrown if no input-urls were retrieved in general.
* @param isEmptyOfData
* @param isFirstRun
* @return finished loading / not finished
* @throws RuntimeException
*/
public static boolean isFinishedLoading(boolean isEmptyOfData, boolean isFirstRun)
{
if ( isEmptyOfData ) {
if ( isFirstRun )
logger.error("Could not retrieve any urls from the inputFile!");
else
logger.debug("Done loading " + FileUtils.getCurrentlyLoadedUrls() + " urls from the inputFile.");
return true;
}
return false;
}
}

File diff suppressed because it is too large Load Diff