2021-03-16 14:25:15 +01:00
package eu.openaire.urls_controller.controllers ;
2023-02-21 14:36:35 +01:00
import eu.openaire.urls_controller.models.UrlReport ;
2021-06-22 04:38:48 +02:00
import eu.openaire.urls_controller.payloads.requests.WorkerReport ;
2023-02-21 14:36:35 +01:00
import eu.openaire.urls_controller.services.UrlsService ;
2021-03-16 14:25:15 +01:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2022-01-30 21:14:52 +01:00
import org.springframework.beans.factory.annotation.Autowired ;
import org.springframework.beans.factory.annotation.Value ;
2021-06-22 04:38:48 +02:00
import org.springframework.http.HttpStatus ;
2021-03-16 14:25:15 +01:00
import org.springframework.http.ResponseEntity ;
2021-06-22 04:38:48 +02:00
import org.springframework.web.bind.annotation.* ;
2021-03-16 14:25:15 +01:00
2021-11-30 17:23:27 +01:00
import javax.servlet.http.HttpServletRequest ;
2022-01-30 21:14:52 +01:00
import java.util.List ;
2021-12-10 20:47:58 +01:00
import java.util.regex.Pattern ;
2021-03-16 14:25:15 +01:00
2022-02-02 19:19:46 +01:00
2021-03-16 14:25:15 +01:00
@RestController
@RequestMapping ( " /urls " )
2023-03-13 11:39:39 +01:00
public class UrlsController {
2021-03-16 14:25:15 +01:00
2023-03-13 11:39:39 +01:00
private static final Logger logger = LoggerFactory . getLogger ( UrlsController . class ) ;
2021-03-16 14:25:15 +01:00
2022-01-30 21:14:52 +01:00
@Autowired
2023-02-21 14:36:35 +01:00
private UrlsService urlsService ;
2022-01-30 21:14:52 +01:00
2022-11-10 16:18:21 +01:00
2021-12-10 20:47:58 +01:00
private static final Pattern MALICIOUS_INPUT_STRING = Pattern . compile ( " .*[';` \" ]+.* " ) ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.assignmentLimit} " )
2022-01-30 21:14:52 +01:00
private int assignmentLimit ;
2021-03-16 14:25:15 +01:00
2022-02-02 19:19:46 +01:00
2021-03-16 14:25:15 +01:00
@GetMapping ( " " )
2023-03-07 15:55:41 +01:00
public ResponseEntity < ? > getAssignments ( @RequestParam String workerId , @RequestParam int workerAssignmentsLimit ) {
2021-03-16 14:25:15 +01:00
2021-12-10 20:47:58 +01:00
// As the Impala-driver is buggy and struggles to support parameterized queries in some types of prepared-statements, we have to sanitize the "workerId" ourselves.
if ( MALICIOUS_INPUT_STRING . matcher ( workerId ) . matches ( ) ) {
String errorMsg = " Possibly malicious \" workerId \" received: " + workerId ;
logger . error ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . FORBIDDEN ) . body ( errorMsg ) ;
}
2022-01-30 21:14:52 +01:00
logger . info ( " Worker with id: \" " + workerId + " \" , requested " + workerAssignmentsLimit + " assignments. The assignments-limit of the controller is: " + assignmentLimit ) ;
2021-03-16 14:25:15 +01:00
2021-11-09 22:59:27 +01:00
// Sanitize the "assignmentsLimit". Do not let an overload happen in the Controller's or the Impala's server.
int assignmentsLimit = workerAssignmentsLimit ;
if ( assignmentsLimit = = 0 ) {
String errorMsg = " The given \" workerAssignmentsLimit \" was ZERO! " ;
logger . error ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . BAD_REQUEST ) . body ( errorMsg ) ;
2022-01-30 21:14:52 +01:00
} else if ( assignmentsLimit > assignmentLimit ) {
logger . warn ( " The given \" workerAssignmentsLimit \" ( " + workerAssignmentsLimit + " ) was larger than the Controller's limit ( " + assignmentLimit + " ). Will use the Controller's limit. " ) ;
assignmentsLimit = assignmentLimit ;
2021-11-09 22:59:27 +01:00
}
2021-08-05 14:43:37 +02:00
2023-03-07 15:55:41 +01:00
return urlsService . getAssignments ( workerId , assignmentsLimit ) ;
2021-03-16 14:25:15 +01:00
}
2022-02-02 19:19:46 +01:00
2021-06-22 04:38:48 +02:00
@PostMapping ( " addWorkerReport " )
2021-11-30 17:23:27 +01:00
public ResponseEntity < ? > addWorkerReport ( @RequestBody WorkerReport workerReport , HttpServletRequest request ) {
2021-06-22 04:38:48 +02:00
2021-11-09 22:59:27 +01:00
if ( workerReport = = null ) {
String errorMsg = " No \" WorkerReport \" was given! " ;
logger . error ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . BAD_REQUEST ) . body ( errorMsg ) ;
}
2021-12-10 20:47:58 +01:00
String curWorkerId = workerReport . getWorkerId ( ) ;
if ( curWorkerId = = null ) {
String errorMsg = " No \" workerId \" was included inside the \" WorkerReport \" ! " ;
logger . error ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . BAD_REQUEST ) . body ( errorMsg ) ;
}
// As the Impala-driver is buggy and struggles to support parameterized queries in some types of prepared-statements, we have to sanitize the "workerId" ourselves.
if ( MALICIOUS_INPUT_STRING . matcher ( curWorkerId ) . matches ( ) ) {
String errorMsg = " Possibly malicious \" workerId \" received: " + curWorkerId ;
logger . error ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . FORBIDDEN ) . body ( errorMsg ) ;
}
2022-04-04 23:01:44 +02:00
int sizeOUrlReports = 0 ;
2021-11-09 22:59:27 +01:00
List < UrlReport > urlReports = workerReport . getUrlReports ( ) ;
2022-04-04 23:01:44 +02:00
if ( ( urlReports = = null ) | | ( ( sizeOUrlReports = urlReports . size ( ) ) = = 0 ) ) {
2021-12-10 20:47:58 +01:00
String errorMsg = " The given \" WorkerReport \" from worker with ID \" " + curWorkerId + " \" was empty (without any UrlReports)! " ;
2021-11-09 22:59:27 +01:00
logger . error ( errorMsg ) ;
return ResponseEntity . status ( HttpStatus . BAD_REQUEST ) . body ( errorMsg ) ;
}
2021-06-22 04:38:48 +02:00
2021-12-06 19:18:30 +01:00
long curReportAssignments = workerReport . getAssignmentRequestCounter ( ) ;
2022-04-04 23:01:44 +02:00
logger . info ( " Received the WorkerReport for batch-assignments_ " + curReportAssignments + " , from the worker with id: " + curWorkerId + " . It contains " + sizeOUrlReports + " urlReports. Going to request the fullTexts from the Worker and insert the UrlReports into the database. " ) ;
2021-06-22 04:38:48 +02:00
2023-02-21 14:36:35 +01:00
return urlsService . addWorkerReport ( curWorkerId , curReportAssignments , urlReports , sizeOUrlReports , request ) ;
2021-11-09 22:59:27 +01:00
}
2021-03-16 14:25:15 +01:00
}