forked from lsmyrnaios/UrlsController
- Add the "getTestUrls"-endpoint which returns an "Assignment" with data retrieved from the added resource-file.
- Update the "getUrls"-endpoint to be ready to retrieve data from the database, once it's added. - Update the dependencies. - Code cleanup.
This commit is contained in:
parent
d3588ea36b
commit
e2cc320baf
|
@ -1,11 +1,11 @@
|
||||||
buildscript {
|
buildscript {
|
||||||
ext {
|
ext {
|
||||||
springSecurityVersion = "5.4.5"
|
springSecurityVersion = "5.4.6"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id 'org.springframework.boot' version '2.4.3'
|
id 'org.springframework.boot' version '2.4.5'
|
||||||
id 'java'
|
id 'java'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,8 +31,10 @@ dependencies {
|
||||||
implementation("org.springframework.security:spring-security-config:${springSecurityVersion}")
|
implementation("org.springframework.security:spring-security-config:${springSecurityVersion}")
|
||||||
implementation("io.jsonwebtoken:jjwt:0.9.1")
|
implementation("io.jsonwebtoken:jjwt:0.9.1")
|
||||||
|
|
||||||
|
// https://mvnrepository.com/artifact/com.google.guava/guava
|
||||||
|
implementation group: 'com.google.guava', name: 'guava', version: '30.1.1-jre'
|
||||||
|
|
||||||
implementation "org.projectlombok:lombok:1.18.18"
|
implementation "org.projectlombok:lombok:1.18.20"
|
||||||
implementation 'com.google.code.gson:gson:2.8.6'
|
implementation 'com.google.code.gson:gson:2.8.6'
|
||||||
implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'
|
implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'
|
||||||
implementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.3'
|
implementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.3'
|
||||||
|
|
|
@ -1,15 +1,19 @@
|
||||||
package eu.openaire.urls_controller.controllers;
|
package eu.openaire.urls_controller.controllers;
|
||||||
|
|
||||||
import eu.openaire.urls_controller.models.UrlToCheck;
|
import com.google.common.collect.HashMultimap;
|
||||||
import eu.openaire.urls_controller.payloads.requests.UrlsRequest;
|
import eu.openaire.urls_controller.models.Assignment;
|
||||||
import eu.openaire.urls_controller.payloads.responces.UrlsResponse;
|
import eu.openaire.urls_controller.models.Task;
|
||||||
|
import eu.openaire.urls_controller.payloads.requests.WorkerRequest;
|
||||||
|
import eu.openaire.urls_controller.payloads.responces.AssignmentResponse;
|
||||||
|
import eu.openaire.urls_controller.util.FileUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.*;
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
@RestController
|
@RestController
|
||||||
@RequestMapping("/urls")
|
@RequestMapping("/urls")
|
||||||
|
@ -24,39 +28,68 @@ public class UrlController {
|
||||||
|
|
||||||
|
|
||||||
@GetMapping("")
|
@GetMapping("")
|
||||||
public ResponseEntity<?> getUrls() {
|
public ResponseEntity<?> getUrls(WorkerRequest workerRequest) {
|
||||||
|
|
||||||
List<UrlToCheck> urlsToCheck = new ArrayList<>();
|
String workerId = workerRequest.getWorkerId();
|
||||||
|
int tasksLimit = workerRequest.getTasksLimit();
|
||||||
|
|
||||||
|
List<Task> tasks = new ArrayList<>();
|
||||||
|
|
||||||
// TODO - Retrieve the ID-URL pairs from the database.
|
// TODO - Create the Assignment from the id-urls stored in the database up to the tasks-limit.
|
||||||
|
|
||||||
// For now, we give static data.
|
Assignment assignment = new Assignment(tasks, workerId, new Date());
|
||||||
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::173a4e29249d4a67e72148ff50a88968", "http://dx.doi.org/10.1590/s0034-76122012000200012"));
|
|
||||||
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::220cb83d8a6e7a90ac4ac72feb030700", "https://doaj.org/toc/2214-9147"));
|
|
||||||
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::2b29092d3ed25d7ccfac894974e0915e", "http://www.plantintroduction.org/index.php/pi/article/view/1201"));
|
|
||||||
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::2fed1ea2149efd0d5a7837240a4aaf71", "http://europepmc.org/articles/pmc2882784?pdf=render"));
|
|
||||||
urlsToCheck.add(new UrlToCheck("50|dedup_wf_001::180e60bbb541a9b9a9313779887da9cf", "http://dx.doi.org/10.1051/e3sconf/202016405016"));
|
|
||||||
|
|
||||||
logger.debug("ID-URL pairs to return:\n" + urlsToCheck);
|
// TODO - Write the Assignment details to the database and then send it to the worker.
|
||||||
|
|
||||||
return ResponseEntity.status(200).header("Content-Type", "application/json").body(new UrlsResponse(urlsToCheck).toString());
|
return ResponseEntity.status(200).header("Content-Type", "application/json").body(new AssignmentResponse(assignment));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@GetMapping("test")
|
||||||
|
public ResponseEntity<?> getTestUrls(WorkerRequest workerRequest) {
|
||||||
|
|
||||||
@PostMapping("")
|
String workerId = workerRequest.getWorkerId();
|
||||||
// TODO - Add authorization to edit the database.
|
int tasksLimit = workerRequest.getTasksLimit();
|
||||||
public ResponseEntity<?> addResults(@RequestBody UrlsRequest urlsRequest) {
|
|
||||||
|
|
||||||
logger.debug(urlsRequest.toString());
|
List<Task> tasks = new ArrayList<>();
|
||||||
|
new FileUtils(); // Find the input file.
|
||||||
|
|
||||||
|
HashMultimap<String, String> loadedIdUrlPairs;
|
||||||
|
boolean isFirstRun = true;
|
||||||
|
boolean tasksLimitReached = false;
|
||||||
|
|
||||||
// TODO - Add data in the DataBase..
|
// Start loading urls.
|
||||||
|
while ( true )
|
||||||
|
{
|
||||||
|
loadedIdUrlPairs = FileUtils.getNextIdUrlPairBatchFromJson(); // Take urls from jsonFile.
|
||||||
|
|
||||||
|
if ( FileUtils.isFinishedLoading(loadedIdUrlPairs.isEmpty(), isFirstRun) ) // Throws RuntimeException which is automatically passed on.
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
isFirstRun = false;
|
||||||
|
|
||||||
return ResponseEntity.ok().build();
|
Set<Map.Entry<String, String>> pairs = loadedIdUrlPairs.entries();
|
||||||
|
|
||||||
|
for ( Map.Entry<String,String> pair : pairs )
|
||||||
|
{
|
||||||
|
if ( tasks.size() > tasksLimit ) {
|
||||||
|
tasksLimitReached = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tasks.add(new Task(pair.getKey(), pair.getValue()));
|
||||||
|
}// end pairs-for-loop
|
||||||
|
|
||||||
|
if ( tasksLimitReached ) {
|
||||||
|
logger.debug("Done loading tasksLimit (" + tasksLimit + ") urls from the inputFile.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// end loading-while-loop
|
||||||
|
|
||||||
|
Assignment assignment = new Assignment(tasks, workerId, new Date());
|
||||||
|
|
||||||
|
return ResponseEntity.status(200).header("Content-Type", "application/json").body(new AssignmentResponse(assignment));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,50 +0,0 @@
|
||||||
package eu.openaire.urls_controller.models;
|
|
||||||
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
|
||||||
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
|
|
||||||
|
|
||||||
|
|
||||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
|
||||||
@JsonPropertyOrder({
|
|
||||||
"id",
|
|
||||||
"url"
|
|
||||||
})
|
|
||||||
public class UrlToCheck { // This model will not match with a database,
|
|
||||||
|
|
||||||
@JsonProperty("id")
|
|
||||||
private String id;
|
|
||||||
|
|
||||||
@JsonProperty("url")
|
|
||||||
private String url;
|
|
||||||
|
|
||||||
public UrlToCheck() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public UrlToCheck(String id, String url) {
|
|
||||||
this.id = id;
|
|
||||||
this.url = url;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getUrl() {
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUrl(String url) {
|
|
||||||
this.url = url;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "{\"id\":\"" + id + "\",\"url\":\"" + url + "\"}\n";
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,41 +0,0 @@
|
||||||
package eu.openaire.urls_controller.payloads.requests;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
|
||||||
import eu.openaire.urls_controller.models.Payload;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
|
||||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
|
||||||
public class UrlsRequest {
|
|
||||||
|
|
||||||
@JsonProperty("payloads")
|
|
||||||
private List<Payload> payloads;
|
|
||||||
|
|
||||||
public void UrlsResponse() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public void UrlsResponse(List<Payload> payloads) {
|
|
||||||
this.payloads = payloads;
|
|
||||||
}
|
|
||||||
|
|
||||||
public UrlsRequest get() {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Payload> getUrlsToCheck() {
|
|
||||||
return payloads;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUrlsToCheck(List<Payload> payloads) {
|
|
||||||
this.payloads = payloads;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "UrlsRequest{" +
|
|
||||||
"payloads=" + payloads +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,38 +0,0 @@
|
||||||
package eu.openaire.urls_controller.payloads.responces;
|
|
||||||
|
|
||||||
import eu.openaire.urls_controller.models.UrlToCheck;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class UrlsResponse {
|
|
||||||
|
|
||||||
private List<UrlToCheck> urlsToCheck;
|
|
||||||
|
|
||||||
public UrlsResponse() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public UrlsResponse(List<UrlToCheck> urlsToCheck) {
|
|
||||||
this.urlsToCheck = urlsToCheck;
|
|
||||||
}
|
|
||||||
|
|
||||||
public UrlsResponse get() {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<UrlToCheck> getUrlsToCheck() {
|
|
||||||
return urlsToCheck;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUrlsToCheck(List<UrlToCheck> urlToCheck) {
|
|
||||||
this.urlsToCheck = urlToCheck;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder sb = new StringBuilder(urlsToCheck.size());
|
|
||||||
for ( UrlToCheck urlToCheck : urlsToCheck ) {
|
|
||||||
sb.append(urlToCheck);
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,170 @@
|
||||||
|
package eu.openaire.urls_controller.util;
|
||||||
|
|
||||||
|
import com.google.common.collect.HashMultimap;
|
||||||
|
import eu.openaire.urls_controller.models.Task;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.boot.configurationprocessor.json.JSONException;
|
||||||
|
import org.springframework.boot.configurationprocessor.json.JSONObject;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.Scanner;
|
||||||
|
|
||||||
|
public class FileUtils {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FileUtils.class);
|
||||||
|
|
||||||
|
private static Scanner inputScanner = null;
|
||||||
|
private static int fileIndex = 0;
|
||||||
|
private static int unretrievableInputLines = 0;
|
||||||
|
private static int duplicateIdUrlEntries = 0;
|
||||||
|
public static int jsonBatchSize = 3000;
|
||||||
|
private static final String utf8Charset = "UTF-8";
|
||||||
|
public static String inputFileFullPath;
|
||||||
|
private static String userDir = System.getProperty("user.dir") + File.separator;
|
||||||
|
|
||||||
|
public FileUtils()
|
||||||
|
{
|
||||||
|
inputFileFullPath = userDir + "src" + File.separator + "main" + File.separator + "resources";
|
||||||
|
String resourceFileName = "testInputFiles" + File.separator + "orderedList1000.json";
|
||||||
|
inputFileFullPath += File.separator + resourceFileName;
|
||||||
|
InputStream inputStream = getClass().getClassLoader().getResourceAsStream(resourceFileName);
|
||||||
|
if ( inputStream == null ) {
|
||||||
|
logger.error("No resourceFile was found with name \"" + resourceFileName + "\".");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug("Going to retrieve the data from the inputResourceFile: " + resourceFileName);
|
||||||
|
|
||||||
|
FileUtils.inputScanner = new Scanner(inputStream, utf8Charset);
|
||||||
|
fileIndex = 0; // Re-initialize the file-number-pointer.
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// This is currently not used, but it may be useful in a future scenario.
|
||||||
|
private static long getInputFileLinesNum()
|
||||||
|
{
|
||||||
|
long numOfLines = 0;
|
||||||
|
try {
|
||||||
|
numOfLines = Files.lines(Paths.get(inputFileFullPath)).count();
|
||||||
|
logger.debug("The numOfLines in the inputFile is " + numOfLines);
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Could not retrieve the numOfLines. " + e);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return numOfLines;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method decodes a Jason String into its members.
|
||||||
|
* @param jsonLine String
|
||||||
|
* @return HashMap<String,String>
|
||||||
|
*/
|
||||||
|
public static Task jsonDecoder(String jsonLine)
|
||||||
|
{
|
||||||
|
// Get ID and url and put them in the HashMap
|
||||||
|
String idStr = null;
|
||||||
|
String urlStr = null;
|
||||||
|
try {
|
||||||
|
JSONObject jObj = new JSONObject(jsonLine); // Construct a JSONObject from the retrieved jsonLine.
|
||||||
|
idStr = jObj.get("id").toString();
|
||||||
|
urlStr = jObj.get("url").toString();
|
||||||
|
} catch (JSONException je) {
|
||||||
|
logger.warn("JSONException caught when tried to parse and extract values from jsonLine: \t" + jsonLine, je);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( urlStr.isEmpty() ) {
|
||||||
|
if ( !idStr.isEmpty() ) // If we only have the id, then go and log it.
|
||||||
|
logger.warn("The url was not found for id: \"" + idStr + "\"");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Task(idStr, urlStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method parses a Json file and extracts the urls, along with the IDs.
|
||||||
|
* @return HashMultimap<String, String>
|
||||||
|
*/
|
||||||
|
public static HashMultimap<String, String> getNextIdUrlPairBatchFromJson()
|
||||||
|
{
|
||||||
|
Task inputIdUrlTuple;
|
||||||
|
int expectedPathsPerID = 5;
|
||||||
|
int expectedIDsPerBatch = jsonBatchSize / expectedPathsPerID;
|
||||||
|
|
||||||
|
HashMultimap<String, String> idAndUrlMappedInput = HashMultimap.create(expectedIDsPerBatch, expectedPathsPerID);
|
||||||
|
|
||||||
|
int curBeginning = fileIndex;
|
||||||
|
|
||||||
|
while ( inputScanner.hasNextLine() && (fileIndex < (curBeginning + jsonBatchSize)) )
|
||||||
|
{// While (!EOF) and inside the current url-batch, iterate through lines.
|
||||||
|
|
||||||
|
//logger.debug("fileIndex: " + FileUtils.fileIndex); // DEBUG!
|
||||||
|
|
||||||
|
// Take each line, remove potential double quotes.
|
||||||
|
String retrievedLineStr = inputScanner.nextLine();
|
||||||
|
//logger.debug("Loaded from inputFile: " + retrievedLineStr); // DEBUG!
|
||||||
|
|
||||||
|
fileIndex ++;
|
||||||
|
|
||||||
|
if ( retrievedLineStr.isEmpty() ) {
|
||||||
|
unretrievableInputLines ++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( (inputIdUrlTuple = jsonDecoder(retrievedLineStr)) == null ) { // Decode the jsonLine and take the two attributes.
|
||||||
|
logger.warn("A problematic inputLine found: \t" + retrievedLineStr);
|
||||||
|
unretrievableInputLines ++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !idAndUrlMappedInput.put(inputIdUrlTuple.getId(), inputIdUrlTuple.getUrl()) ) { // We have a duplicate url in the input.. log it here as we cannot pass it through the HashMultimap. It's possible that this as well as the original might be/give a docUrl.
|
||||||
|
duplicateIdUrlEntries ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return idAndUrlMappedInput;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method returns the number of (non-heading, non-empty) lines we have read from the inputFile.
|
||||||
|
* @return loadedUrls
|
||||||
|
*/
|
||||||
|
public static int getCurrentlyLoadedUrls() // In the end, it gives the total number of urls we have processed.
|
||||||
|
{
|
||||||
|
return FileUtils.fileIndex - FileUtils.unretrievableInputLines;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method checks if there is no more input-data and returns true in that case.
|
||||||
|
* Otherwise, it returns false, if there is more input-data to be loaded.
|
||||||
|
* A "RuntimeException" is thrown if no input-urls were retrieved in general.
|
||||||
|
* @param isEmptyOfData
|
||||||
|
* @param isFirstRun
|
||||||
|
* @return finished loading / not finished
|
||||||
|
* @throws RuntimeException
|
||||||
|
*/
|
||||||
|
public static boolean isFinishedLoading(boolean isEmptyOfData, boolean isFirstRun)
|
||||||
|
{
|
||||||
|
if ( isEmptyOfData ) {
|
||||||
|
if ( isFirstRun )
|
||||||
|
logger.error("Could not retrieve any urls from the inputFile!");
|
||||||
|
else
|
||||||
|
logger.debug("Done loading " + FileUtils.getCurrentlyLoadedUrls() + " urls from the inputFile.");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue