uoa-validator-api/src/main/java/eu/dnetlib/validatorapi/controllers/ValidationController.java

524 lines
24 KiB
Java

package eu.dnetlib.validatorapi.controllers;
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.Guideline;
import eu.dnetlib.validator2.validation.guideline.openaire.*;
import eu.dnetlib.validatorapi.entities.RuleInfo;
import eu.dnetlib.validatorapi.entities.ValidationJob;
import eu.dnetlib.validatorapi.entities.ValidationRuleResult;
import eu.dnetlib.validatorapi.repositories.ValidationIssueRepository;
import eu.dnetlib.validatorapi.repositories.ValidationJobRepository;
import eu.dnetlib.validatorapi.repositories.ValidationResultRepository;
import eu.dnetlib.validatorapi.routes.OaiPmhRoute;
import org.apache.camel.CamelContext;
import org.apache.camel.ProducerTemplate;
import org.apache.camel.builder.RouteBuilder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.*;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
@RestController
@CrossOrigin(origins = "*")
public class ValidationController {
private final Logger log = LogManager.getLogger(this.getClass());
private final ValidationJobRepository validationJobRepository;
private final ValidationResultRepository validationResultRepository;
private final ValidationIssueRepository validationIssueRepository;
@Autowired
CamelContext camelContext;
@Autowired
public ValidationController(ValidationJobRepository validationJobRepository,
ValidationResultRepository validationResultRepository,
ValidationIssueRepository validationIssueRepository) {
this.validationJobRepository = validationJobRepository;
this.validationResultRepository = validationResultRepository;
this.validationIssueRepository = validationIssueRepository;
}
@RequestMapping(value = {"/realValidator-old"}, method = RequestMethod.GET)
public void validateRealOAIPMH(@RequestParam(name = "guidelines") String guidelinesProfileName,
@RequestParam(name = "baseUrl", defaultValue = "http://repositorium.sdum.uminho.pt/oai/request") String baseURL, //not in use now
@RequestParam(name="numberOfRecords", defaultValue = "10") int numberOfRecords, //not in use now
@RequestParam(name="set", required = false) String set, @RequestParam(name="metadataPrefix", defaultValue = "oai_dc") String metadataPrefix,
@RequestParam(name="batchsize", defaultValue = "50") int batchSize) {
AbstractOpenAireProfile profile = initializeOpenAireProfile(guidelinesProfileName);
AbstractOpenAireProfile fairProfile = initializeFairProfile(guidelinesProfileName);
if (profile == null && fairProfile == null) {
log.error("Exception: No valid guidelines");
new Exception("Validation Job stopped unexpectedly. No valid guidelines were provided.");
}
ValidationJob validationJob = new ValidationJob(baseURL, numberOfRecords);
validationJob.guidelines = profile.name();
validationJobRepository.save(validationJob);
System.out.println("id " + validationJob.id);
int record = 0;
double resultSum = 0;
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
String url = baseURL + "?verb=ListRecords&metadataPrefix=" + metadataPrefix;
if (set != null) {
url += "&set=" + set;
}
HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection();
Document oaipmhResponse = parseResponse(conn.getInputStream());
//TODO: follow other approach if records are more than 100!!!
int recordsRetrieved = 0;
List<String> records = new ArrayList<String>();
// Retrieve additional records using resumptionToken
getNumberOfRecords(baseURL, batchSize, oaipmhResponse, records, recordsRetrieved);
for (String recordXml : records) {
Document doc = db.parse(new InputSource(new StringReader(recordXml)));
if (profile != null) {
XMLApplicationProfile.ValidationResult validationResult = profile.validate("id", doc); //what id is that?
Map<String, Guideline.Result> results = validationResult.results();
for (Map.Entry entry : results.entrySet()) {
Guideline.Result engineResult = (Guideline.Result) entry.getValue();
String recordUrl = extractRecordUrl(doc, "identifier");
String ruleName = entry.getKey().toString();
ValidationRuleResult validationRuleResult = constructValidationRuleResult(validationJob.id, recordUrl,
ruleName, profile, engineResult);
resultSum += engineResult.score();
validationResultRepository.save(validationRuleResult);
//saveValidationIssues(validationJob.id, recordUrl, ruleName, engineResult);
}
record++;
System.out.println(record++);
}
}
validationJob.progress = "COMPLETED";
}
catch (Exception e) {
log.error("Validation job stopped unexpectedly." + e.getMessage());
System.out.println("ERROR " + e.getMessage());
validationJob.progress = "STOPPED";
} finally {
validationJob.endDate = new Date();
//System.out.println("Final validation job "+ validationJob.hashCode());
validationJob.recordsTested = record;
validationJob.score = resultSum / record;
//TODO uncomment
log.info("Saving validation job " + validationJob.recordsTested);
validationJobRepository.save(validationJob);
}
//xmlValidationResponse.setRules(resultRules);
//xmlValidationResponse.setFairRules(fairRules);
}
private void getNumberOfRecords(String baseURL, int batchSize, Document oaipmhResponse, List<String> records, int recordsRetrieved) throws Exception {
String url;
HttpURLConnection conn;
while (recordsRetrieved < batchSize) {
Element resumptionToken = (Element) oaipmhResponse.getElementsByTagName("resumptionToken").item(0);
if (resumptionToken == null || resumptionToken.getTextContent().isEmpty()) {
break; // no more records to retrieve
}
url = baseURL + "?verb=ListRecords&resumptionToken=" + resumptionToken.getTextContent();
conn = (HttpURLConnection) new URL(url).openConnection();
oaipmhResponse = parseResponse(conn.getInputStream());
XPathFactory xfactory = XPathFactory.newInstance();
XPath xpath = xfactory.newXPath();
XPathExpression recordsExpression = xpath.compile("//record");
NodeList recordNodes = (NodeList) recordsExpression.evaluate(oaipmhResponse, XPathConstants.NODESET);
for (int i = 0; i < recordNodes.getLength(); ++i) {
Node element = recordNodes.item(i);
StringWriter stringWriter = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer();
xform.transform(new DOMSource(element), new StreamResult(stringWriter));
records.add(stringWriter.toString());
}
recordsRetrieved += records.size();
}
}
@RequestMapping(value = {"/realValidator"}, method = RequestMethod.GET)
public void validateWithApacheCamel(@RequestParam(name = "guidelines") String guidelinesProfileName,
@RequestParam(name = "baseUrl", defaultValue = "http://repositorium.sdum.uminho.pt/oai/request") String baseURL, //not in use now
@RequestParam(name="numberOfRecords", defaultValue = "10") int numberOfRecords,
@RequestParam(name="set", required = false) String set//, //not in use now
//@RequestParam(name="metadataPrefix", defaultValue = "oai_dc") String metadataPrefix
) {
AbstractOpenAireProfile profile = initializeOpenAireProfile(guidelinesProfileName);
AbstractOpenAireProfile fairProfile = initializeFairProfile(guidelinesProfileName);
String metadataPrefix = initializeMetadataPrefix(guidelinesProfileName);
if (profile == null && fairProfile == null) {
log.error("Exception: No valid guidelines " + guidelinesProfileName + ".");
new Exception("Validation Job stopped unexpectedly. No valid guidelines " +
guidelinesProfileName + " were provided.");
}
ValidationJob validationJob = new ValidationJob(baseURL, numberOfRecords);
validationJob.guidelines = profile.name();
validationJobRepository.save(validationJob);
log.info("Initial validation job id "+ validationJob.id);
int record = 0;
double resultSum = 0;
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
RouteBuilder oaiPmhRouteBuilder =
new OaiPmhRoute("oaipmh://"+baseURL + "?verb=ListRecords&metadataPrefix=" + metadataPrefix ,
profile, validationJob, numberOfRecords);
/*
RouteBuilder oaiPmhRouteBuilder =
new OAI_PMH_RouteBuilder("oaipmh://"+baseURL + "?verb=ListRecords&metadataPrefix=" + metadataPrefix ,
profile, validationJob, numberOfRecords, validationJobRepository, validationIssueRepository, validationResultRepository);*/
camelContext.addRoutes(oaiPmhRouteBuilder);
}
catch (Exception e) {
log.error("Validation job stopped unexpectedly." + e.getMessage());
System.out.println("ERROR " + e.getMessage());
validationJob.progress = "STOPPED";
validationJob.endDate = new Date();
validationJob.score = resultSum / validationJob.recordsTested;
validationJob.status = validationResultRepository.getStatus(validationJob.id);
validationJobRepository.save(validationJob);
}
}
@RequestMapping(value = {"/getSets"}, method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
public String getSets(@RequestParam(name = "baseUrl", defaultValue = "http://repositorium.sdum.uminho.pt/oai/request") String baseURL //not in use now
) throws Exception {
ProducerTemplate producerTemplate = camelContext.createProducerTemplate();
String response = producerTemplate.requestBodyAndHeader("direct:getResponse", null, "endpoint", "oaipmh://"+ baseURL + "?verb=ListSets", String.class);
return response;
/*
System.out.println(camelContext.getRoutes());
RouteBuilder listSetsBuilder = new OaiSetListRoute("oaipmh://"+ baseURL + "?verb=ListSets");
if(camelContext.getRoutes().size()<1)
camelContext.addRoutes(listSetsBuilder);
System.out.println(camelContext.getRoutes());
/*camelContext.getRouteController().startRoute("oaiSetListRoute");
camelContext.getRouteController().stopRoute("oaiSetListRoute");*/
/* ProducerTemplate template = camelContext.createProducerTemplate();
String response = template.requestBody("direct:getResponse", null, String.class);
return response;
*/
//System.out.println(((OaiSetListRoute) listSetsBuilder).xml);
}
@RequestMapping(value = {"/demo"}, produces = MediaType.APPLICATION_JSON_VALUE, method = RequestMethod.POST)
public String demo() {
// Logic to retrieve sets and convert to JSON
// Replace with your actual logic
// Example response
String jsonResponse = "{ \"sets\": [\"Set1\", \"Set2\", \"Set3\"] }";
return jsonResponse;
}
private void constructValidationJobResult(ValidationJob validationJob, ValidationRuleResult validationRuleResult,
Map.Entry entry, AbstractOpenAireProfile profile, Document document) {
validationRuleResult.validationJobId = validationJob.id;
validationRuleResult.ruleName = entry.getKey().toString();
validationRuleResult.ruleWeight = profile.guideline(validationRuleResult.ruleName).getWeight();
validationRuleResult.recordUrl = extractRecordUrl(document, "dc:identifier");
Guideline.Result engineResult = (Guideline.Result) entry.getValue();
validationRuleResult.score = engineResult.score();
validationRuleResult.status = engineResult.status().toString();
validationRuleResult.internalError = engineResult.internalError();
}
private ValidationRuleResult constructValidationJobResult(ValidationJob validationJob,
Map.Entry entry, AbstractOpenAireProfile profile, Document document) {
ValidationRuleResult validationRuleResult = new ValidationRuleResult();
validationRuleResult.validationJobId = validationJob.id;
validationRuleResult.ruleName = entry.getKey().toString();
validationRuleResult.ruleWeight = profile.guideline(validationRuleResult.ruleName).getWeight();
validationRuleResult.recordUrl = extractRecordUrl(document, "dc:identifier");
Guideline.Result engineResult = (Guideline.Result) entry.getValue();
validationRuleResult.score = engineResult.score();
validationRuleResult.status = engineResult.status().toString();
validationRuleResult.internalError = engineResult.internalError();
return validationRuleResult;
}
/* private void saveValidationIssues(int validationJobId, String recordUrl, String ruleName, Guideline.Result engineResult) {
for (String error:engineResult.errors()) {
*//*System.out.println("11111");*//*
ValidationIssue validationIssue = new ValidationIssue();
validationIssue.validationJobId = validationJobId;
validationIssue.ruleName = ruleName;
validationIssue.recordUrl = recordUrl;
validationIssue.issueType = "ERROR";
validationIssue.issueText = error;
validationIssueRepository.save(validationIssue);
*//*
System.out.println(validationIssue);
*//*
}
for (String warning: engineResult.warnings()){
*//*
System.out.println("22222");
*//*
ValidationIssue validationIssue = new ValidationIssue();
validationIssue.validationJobId = validationJobId;
validationIssue.ruleName = ruleName;
validationIssue.recordUrl = recordUrl;
validationIssue.issueType = "WARNING";
validationIssue.issueText = warning;
validationIssueRepository.save(validationIssue);
*//*
System.out.println(validationIssue);
*//*
}
}*/
private ValidationRuleResult constructValidationRuleResult(int validationJobId, String recordUrl, String ruleName,
AbstractOpenAireProfile profile, Guideline.Result engineResult) {
ValidationRuleResult validationRuleResult = new ValidationRuleResult();
validationRuleResult.validationJobId = validationJobId;
validationRuleResult.ruleName = ruleName;
validationRuleResult.ruleWeight = profile.guideline(ruleName).getWeight();
validationRuleResult.recordUrl = recordUrl;
validationRuleResult.score = engineResult.score();
validationRuleResult.status = engineResult.status().toString();
validationRuleResult.internalError = engineResult.internalError();
return validationRuleResult;
}
//TODO consider throwing exception
private String extractRecordUrl(Document doc, String xmlField) {
NodeList identifierNodes = doc.getElementsByTagName(xmlField);
if (identifierNodes.getLength() > 0) {
Element identifierElement = (Element) identifierNodes.item(0);
return identifierElement.getTextContent();
}
return "-";
}
private AbstractOpenAireProfile initializeOpenAireProfile(String guidelinesProfileName) {
if (guidelinesProfileName.equals("dataArchiveGuidelinesV2Profile")) {
return new DataArchiveGuidelinesV2Profile();
} else if (guidelinesProfileName.equals("literatureGuidelinesV3Profile")) {
return new LiteratureGuidelinesV3Profile();
} else if (guidelinesProfileName.equals("literatureGuidelinesV4Profile")) {
return new LiteratureGuidelinesV4Profile();
}
return null;
}
private String initializeMetadataPrefix(String guidelinesProfileName) {
if (guidelinesProfileName.equals("dataArchiveGuidelinesV2Profile")) {
return "oai_openaire";
} else if (guidelinesProfileName.equals("literatureGuidelinesV3Profile")) {
return "oai_dc";
} else if (guidelinesProfileName.equals("literatureGuidelinesV4Profile")) {
return "oai_openaire";
}
return null;
}
private AbstractOpenAireProfile initializeFairProfile(String guidelinesProfileName) {
if (guidelinesProfileName.equals("dataArchiveGuidelinesV2Profile")) {
return new FAIR_Data_GuidelinesProfile();
} else if (guidelinesProfileName.equals("fairDataGuidelinesProfile")) {
return new FAIR_Data_GuidelinesProfile();
}
return null;
}
private Document parseResponse(InputStream stream)
throws Exception
{
DocumentBuilderFactory objDocumentBuilderFactory = null;
DocumentBuilder objDocumentBuilder = null;
Document doc = null;
try
{
objDocumentBuilderFactory = DocumentBuilderFactory.newInstance();
objDocumentBuilder = objDocumentBuilderFactory.newDocumentBuilder();
doc = objDocumentBuilder.parse(stream);
}
catch(Exception ex)
{
throw ex;
}
return doc;
}
@RequestMapping(value = {"/validateOAIPMH"}, method = RequestMethod.POST)
public void validateOAIPMH(@RequestParam(name = "guidelines") String guidelinesProfileName,
@RequestParam(name = "baseUrl", defaultValue = "localhost") String baseUrl, //not in use now
@RequestParam(name="numberOfRecords", defaultValue = "10") int numberOfRecords, //not in use now
@RequestBody String OAIPMHResponse) {
ValidationJob validationJob = new ValidationJob(baseUrl, numberOfRecords);
List<RuleInfo> resultRules = null;
List<RuleInfo> fairRules = null;
AbstractOpenAireProfile profile = initializeOpenAireProfile(guidelinesProfileName);
AbstractOpenAireProfile fairProfile = initializeFairProfile(guidelinesProfileName);
validationJob.guidelines = profile.name();
validationJobRepository.save(validationJob);
log.info("Initial validation job id "+ validationJob.id);
int record = 0;
double resultSum = 0;
try {
List<String> recordXmls = extractRecordXmls(OAIPMHResponse);
List<ValidationRuleResult> validationRuleResults = new ArrayList<>();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
for (String recordXml : recordXmls) {
Document doc = db.parse(new InputSource(new StringReader(recordXml)));
if(profile != null) {
resultRules = new ArrayList<>();
//what id is that?
XMLApplicationProfile.ValidationResult validationResult = profile.validate("id", doc);
Map<String, Guideline.Result> results = validationResult.results();
for (Map.Entry entry : results.entrySet()) {
ValidationRuleResult validationRuleResult = new ValidationRuleResult();
validationRuleResult.validationJobId = validationJob.id;
validationRuleResult.ruleName = entry.getKey().toString();
validationRuleResult.recordUrl = "localhost://records/record["+record+"]"; // silly id
Guideline.Result engineResult = (Guideline.Result) entry.getValue();
validationRuleResult.score = engineResult.score();
validationRuleResult.status = engineResult.status().toString();
validationRuleResult.internalError = engineResult.internalError();
System.out.println(validationRuleResult + " | " + validationRuleResult.hashCode() + "\n");
validationResultRepository.save(validationRuleResult);
validationRuleResults.add(validationRuleResult);
resultSum += engineResult.score();
}
}
record++;
}
validationJob.progress = "COMPLETED";
}
catch (Exception e) {
log.error("Validation job stopped unexpectedly." + e.getMessage());
validationJob.progress = "STOPPED";
} finally {
validationJob.endDate = new Date();
log.info("Final validation job "+ validationJob.id);
validationJob.recordsTested = record;
validationJob.score = resultSum / record;
validationJobRepository.save(validationJob);
}
}
public List<String> extractRecordXmls(String xml) throws Exception {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(new InputSource(new StringReader(xml)));
XPathFactory xfactory = XPathFactory.newInstance();
XPath xpath = xfactory.newXPath();
XPathExpression recordsExpression = xpath.compile("//record");
NodeList recordNodes = (NodeList) recordsExpression.evaluate(doc, XPathConstants.NODESET);
List<String> records = new ArrayList<String>();
for (int i = 0; i < recordNodes.getLength(); ++i) {
Node element = recordNodes.item(i);
StringWriter stringWriter = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer();
xform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); // optional
xform.setOutputProperty(OutputKeys.INDENT, "yes"); // optional
xform.transform(new DOMSource(element), new StreamResult(stringWriter));
records.add(stringWriter.toString());
}
return records;
}
}