2023-04-11 16:41:01 +02:00
package eu.dnetlib.validatorapi.controllers ;
import eu.dnetlib.validator2.validation.XMLApplicationProfile ;
import eu.dnetlib.validator2.validation.guideline.Guideline ;
import eu.dnetlib.validator2.validation.guideline.openaire.* ;
2023-05-29 13:40:25 +02:00
import eu.dnetlib.validatorapi.entities.RuleInfo ;
import eu.dnetlib.validatorapi.entities.ValidationJob ;
import eu.dnetlib.validatorapi.entities.ValidationRuleResult ;
2023-09-13 12:23:27 +02:00
import eu.dnetlib.validatorapi.exceptions.ValidationException ;
2023-11-07 14:06:11 +01:00
import eu.dnetlib.validatorapi.repositories.SummaryValidationJobRepository ;
2023-05-03 14:00:57 +02:00
import eu.dnetlib.validatorapi.repositories.ValidationIssueRepository ;
import eu.dnetlib.validatorapi.repositories.ValidationJobRepository ;
import eu.dnetlib.validatorapi.repositories.ValidationResultRepository ;
2023-07-25 14:59:12 +02:00
import eu.dnetlib.validatorapi.routes.FairOaiPmhRoute2 ;
import eu.dnetlib.validatorapi.routes.SimpleOaiPmhRoute ;
2023-05-29 13:40:25 +02:00
import org.apache.camel.CamelContext ;
2023-11-20 13:36:46 +01:00
import org.apache.camel.Exchange ;
2023-06-27 09:58:55 +02:00
import org.apache.camel.ProducerTemplate ;
2023-05-29 13:40:25 +02:00
import org.apache.camel.builder.RouteBuilder ;
2023-09-13 12:23:27 +02:00
import org.apache.http.HttpStatus ;
2023-04-11 16:41:01 +02:00
import org.apache.logging.log4j.LogManager ;
import org.apache.logging.log4j.Logger ;
import org.springframework.beans.factory.annotation.Autowired ;
2023-06-20 14:42:16 +02:00
import org.springframework.http.MediaType ;
2023-09-13 12:23:27 +02:00
import org.springframework.http.ResponseEntity ;
2023-04-11 16:41:01 +02:00
import org.springframework.web.bind.annotation.* ;
import org.w3c.dom.Document ;
2023-04-25 10:18:14 +02:00
import org.w3c.dom.Element ;
2023-04-11 16:41:01 +02:00
import org.w3c.dom.Node ;
import org.w3c.dom.NodeList ;
import org.xml.sax.InputSource ;
import javax.xml.parsers.DocumentBuilder ;
import javax.xml.parsers.DocumentBuilderFactory ;
import javax.xml.transform.OutputKeys ;
import javax.xml.transform.Transformer ;
import javax.xml.transform.TransformerFactory ;
import javax.xml.transform.dom.DOMSource ;
import javax.xml.transform.stream.StreamResult ;
import javax.xml.xpath.XPath ;
import javax.xml.xpath.XPathConstants ;
import javax.xml.xpath.XPathExpression ;
import javax.xml.xpath.XPathFactory ;
2023-04-25 10:18:14 +02:00
import java.io.InputStream ;
2023-04-11 16:41:01 +02:00
import java.io.StringReader ;
import java.io.StringWriter ;
2023-04-25 10:18:14 +02:00
import java.net.HttpURLConnection ;
import java.net.URL ;
2023-07-24 15:34:40 +02:00
import java.util.* ;
2023-04-11 16:41:01 +02:00
@RestController
@CrossOrigin ( origins = " * " )
public class ValidationController {
private final Logger log = LogManager . getLogger ( this . getClass ( ) ) ;
private final ValidationJobRepository validationJobRepository ;
private final ValidationResultRepository validationResultRepository ;
2023-05-03 14:00:57 +02:00
private final ValidationIssueRepository validationIssueRepository ;
2023-04-11 16:41:01 +02:00
2023-11-07 14:06:11 +01:00
private final SummaryValidationJobRepository summaryValidationJobRepository ;
2023-05-30 11:25:10 +02:00
@Autowired
CamelContext camelContext ;
2023-04-11 16:41:01 +02:00
@Autowired
2023-05-29 13:40:25 +02:00
public ValidationController ( ValidationJobRepository validationJobRepository ,
ValidationResultRepository validationResultRepository ,
2023-11-07 14:06:11 +01:00
ValidationIssueRepository validationIssueRepository ,
SummaryValidationJobRepository summaryValidationJobRepository ) {
2023-04-11 16:41:01 +02:00
this . validationJobRepository = validationJobRepository ;
this . validationResultRepository = validationResultRepository ;
2023-05-03 14:00:57 +02:00
this . validationIssueRepository = validationIssueRepository ;
2023-11-07 14:06:11 +01:00
this . summaryValidationJobRepository = summaryValidationJobRepository ;
2023-04-11 16:41:01 +02:00
}
2023-05-29 13:40:25 +02:00
@RequestMapping ( value = { " /realValidator-old " } , method = RequestMethod . GET )
2023-04-25 10:18:14 +02:00
public void validateRealOAIPMH ( @RequestParam ( name = " guidelines " ) String guidelinesProfileName ,
2023-05-03 14:00:57 +02:00
@RequestParam ( name = " baseUrl " , defaultValue = " http://repositorium.sdum.uminho.pt/oai/request " ) String baseURL , //not in use now
@RequestParam ( name = " numberOfRecords " , defaultValue = " 10 " ) int numberOfRecords , //not in use now
@RequestParam ( name = " set " , required = false ) String set , @RequestParam ( name = " metadataPrefix " , defaultValue = " oai_dc " ) String metadataPrefix ,
@RequestParam ( name = " batchsize " , defaultValue = " 50 " ) int batchSize ) {
2023-04-11 16:41:01 +02:00
2023-05-03 14:00:57 +02:00
AbstractOpenAireProfile profile = initializeOpenAireProfile ( guidelinesProfileName ) ;
AbstractOpenAireProfile fairProfile = initializeFairProfile ( guidelinesProfileName ) ;
if ( profile = = null & & fairProfile = = null ) {
log . error ( " Exception: No valid guidelines " ) ;
new Exception ( " Validation Job stopped unexpectedly. No valid guidelines were provided. " ) ;
}
2023-10-02 13:08:47 +02:00
ValidationJob validationJob = new ValidationJob ( baseURL , null , numberOfRecords ) ;
2023-04-25 10:18:14 +02:00
validationJob . guidelines = profile . name ( ) ;
2023-05-03 14:00:57 +02:00
validationJobRepository . save ( validationJob ) ;
2023-06-12 14:48:10 +02:00
System . out . println ( " id " + validationJob . id ) ;
2023-04-25 10:18:14 +02:00
int record = 0 ;
double resultSum = 0 ;
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory . newInstance ( ) ;
DocumentBuilder db = dbf . newDocumentBuilder ( ) ;
String url = baseURL + " ?verb=ListRecords&metadataPrefix= " + metadataPrefix ;
if ( set ! = null ) {
url + = " &set= " + set ;
}
2023-05-03 14:00:57 +02:00
2023-04-25 10:18:14 +02:00
HttpURLConnection conn = ( HttpURLConnection ) new URL ( url ) . openConnection ( ) ;
Document oaipmhResponse = parseResponse ( conn . getInputStream ( ) ) ;
2023-05-29 13:40:25 +02:00
2023-04-25 10:18:14 +02:00
2023-05-03 14:00:57 +02:00
//TODO: follow other approach if records are more than 100!!!
2023-04-25 10:18:14 +02:00
int recordsRetrieved = 0 ;
2023-05-29 13:40:25 +02:00
List < String > records = new ArrayList < String > ( ) ;
// Retrieve additional records using resumptionToken
getNumberOfRecords ( baseURL , batchSize , oaipmhResponse , records , recordsRetrieved ) ;
2023-04-25 10:18:14 +02:00
2023-05-03 14:00:57 +02:00
for ( String recordXml : records ) {
Document doc = db . parse ( new InputSource ( new StringReader ( recordXml ) ) ) ;
2023-04-25 10:18:14 +02:00
2023-05-03 14:00:57 +02:00
if ( profile ! = null ) {
XMLApplicationProfile . ValidationResult validationResult = profile . validate ( " id " , doc ) ; //what id is that?
2023-04-25 10:18:14 +02:00
2023-05-03 14:00:57 +02:00
Map < String , Guideline . Result > results = validationResult . results ( ) ;
for ( Map . Entry entry : results . entrySet ( ) ) {
2023-04-25 10:18:14 +02:00
Guideline . Result engineResult = ( Guideline . Result ) entry . getValue ( ) ;
2023-05-03 14:00:57 +02:00
String recordUrl = extractRecordUrl ( doc , " identifier " ) ;
String ruleName = entry . getKey ( ) . toString ( ) ;
ValidationRuleResult validationRuleResult = constructValidationRuleResult ( validationJob . id , recordUrl ,
ruleName , profile , engineResult ) ;
resultSum + = engineResult . score ( ) ;
2023-04-25 10:18:14 +02:00
2023-05-03 14:00:57 +02:00
validationResultRepository . save ( validationRuleResult ) ;
2023-07-14 13:21:21 +02:00
//saveValidationIssues(validationJob.id, recordUrl, ruleName, engineResult);
2023-04-25 10:18:14 +02:00
}
2023-05-03 14:00:57 +02:00
2023-04-25 10:18:14 +02:00
record + + ;
2023-06-12 14:48:10 +02:00
System . out . println ( record + + ) ;
2023-04-25 10:18:14 +02:00
}
}
2023-06-13 14:57:39 +02:00
validationJob . progress = " COMPLETED " ;
2023-04-25 10:18:14 +02:00
}
catch ( Exception e ) {
2023-09-13 12:23:27 +02:00
log . error ( " Validation job stopped unexpectedly. " + e . getMessage ( ) ) ;
2023-06-13 14:57:39 +02:00
validationJob . progress = " STOPPED " ;
2023-09-13 12:23:27 +02:00
validationJob . exceptionClass = e . getClass ( ) . getCanonicalName ( ) ;
validationJob . exceptionMessage = e . getMessage ( ) ;
2023-04-25 10:18:14 +02:00
} finally {
validationJob . endDate = new Date ( ) ;
2023-05-29 13:40:25 +02:00
//System.out.println("Final validation job "+ validationJob.hashCode());
2023-04-25 10:18:14 +02:00
validationJob . recordsTested = record ;
validationJob . score = resultSum / record ;
2023-06-13 14:57:39 +02:00
log . info ( " Saving validation job " + validationJob . recordsTested ) ;
2023-06-12 14:48:10 +02:00
validationJobRepository . save ( validationJob ) ;
2023-04-25 10:18:14 +02:00
}
//xmlValidationResponse.setRules(resultRules);
//xmlValidationResponse.setFairRules(fairRules);
}
2023-05-29 13:40:25 +02:00
private void getNumberOfRecords ( String baseURL , int batchSize , Document oaipmhResponse , List < String > records , int recordsRetrieved ) throws Exception {
String url ;
HttpURLConnection conn ;
while ( recordsRetrieved < batchSize ) {
Element resumptionToken = ( Element ) oaipmhResponse . getElementsByTagName ( " resumptionToken " ) . item ( 0 ) ;
if ( resumptionToken = = null | | resumptionToken . getTextContent ( ) . isEmpty ( ) ) {
break ; // no more records to retrieve
}
url = baseURL + " ?verb=ListRecords&resumptionToken= " + resumptionToken . getTextContent ( ) ;
conn = ( HttpURLConnection ) new URL ( url ) . openConnection ( ) ;
oaipmhResponse = parseResponse ( conn . getInputStream ( ) ) ;
XPathFactory xfactory = XPathFactory . newInstance ( ) ;
XPath xpath = xfactory . newXPath ( ) ;
XPathExpression recordsExpression = xpath . compile ( " //record " ) ;
NodeList recordNodes = ( NodeList ) recordsExpression . evaluate ( oaipmhResponse , XPathConstants . NODESET ) ;
for ( int i = 0 ; i < recordNodes . getLength ( ) ; + + i ) {
Node element = recordNodes . item ( i ) ;
StringWriter stringWriter = new StringWriter ( ) ;
Transformer xform = TransformerFactory . newInstance ( ) . newTransformer ( ) ;
xform . transform ( new DOMSource ( element ) , new StreamResult ( stringWriter ) ) ;
records . add ( stringWriter . toString ( ) ) ;
}
recordsRetrieved + = records . size ( ) ;
}
}
2023-07-25 14:59:12 +02:00
@RequestMapping ( value = { " /realValidator " } , method = RequestMethod . GET , produces = MediaType . APPLICATION_JSON_VALUE )
2023-09-13 12:23:27 +02:00
public ResponseEntity < ValidationJob > validateWithApacheCamel ( @RequestParam ( name = " guidelines " ) String guidelinesProfileName ,
@RequestParam ( name = " baseUrl " , defaultValue = " http://repositorium.sdum.uminho.pt/oai/request " ) String baseURL , //not in use now
@RequestParam ( name = " numberOfRecords " , defaultValue = " 10 " ) int numberOfRecords ,
2023-10-02 13:08:47 +02:00
@RequestParam ( name = " set " , required = false ) Optional < String > set ) {
2023-05-29 13:40:25 +02:00
AbstractOpenAireProfile profile = initializeOpenAireProfile ( guidelinesProfileName ) ;
AbstractOpenAireProfile fairProfile = initializeFairProfile ( guidelinesProfileName ) ;
2023-07-10 13:11:36 +02:00
String metadataPrefix = initializeMetadataPrefix ( guidelinesProfileName ) ;
2023-05-29 13:40:25 +02:00
if ( profile = = null & & fairProfile = = null ) {
log . error ( " Exception: No valid guidelines " + guidelinesProfileName + " . " ) ;
2023-09-13 12:23:27 +02:00
throw new ValidationException ( " Validation Job stopped unexpectedly. No valid guidelines " +
2023-05-29 13:40:25 +02:00
guidelinesProfileName + " were provided. " ) ;
}
2023-10-02 13:08:47 +02:00
ValidationJob validationJob = new ValidationJob ( baseURL , set , numberOfRecords ) ;
2023-05-29 13:40:25 +02:00
validationJob . guidelines = profile . name ( ) ;
validationJobRepository . save ( validationJob ) ;
2023-06-12 14:48:10 +02:00
log . info ( " Initial validation job id " + validationJob . id ) ;
2023-05-29 13:40:25 +02:00
int record = 0 ;
double resultSum = 0 ;
try {
2023-07-24 15:34:40 +02:00
UUID uuid = UUID . randomUUID ( ) ;
2023-07-25 14:59:12 +02:00
RouteBuilder oaiPmhRouteBuilder ;
2023-07-24 15:34:40 +02:00
2023-07-26 13:19:44 +02:00
String oaiEndpoint = " oaipmh:// " + baseURL + " ?verb=ListRecords&metadataPrefix= " + metadataPrefix ;
2023-10-02 13:08:47 +02:00
2023-07-26 13:19:44 +02:00
if ( set . isPresent ( ) )
oaiEndpoint + = " &set= " + set . get ( ) ;
2023-07-25 14:59:12 +02:00
if ( fairProfile = = null ) {
oaiPmhRouteBuilder =
2023-11-07 14:06:11 +01:00
new SimpleOaiPmhRoute ( oaiEndpoint , profile , validationJob , numberOfRecords , uuid . toString ( ) ,
2023-11-20 13:36:46 +01:00
validationJobRepository , validationResultRepository , summaryValidationJobRepository ) ;
2023-10-02 13:08:47 +02:00
2023-11-20 13:36:46 +01:00
// new SimpleOaiPmhRoute(oaiEndpoint, profile, validationJob, numberOfRecords, uuid.toString());
2023-07-25 14:59:12 +02:00
} else {
oaiPmhRouteBuilder =
2023-11-21 14:19:09 +01:00
// new FairOaiPmhRoute2(oaiEndpoint, profile, validationJob, numberOfRecords, uuid.toString());
new FairOaiPmhRoute2 ( oaiEndpoint , profile , fairProfile , validationJob , numberOfRecords , uuid . toString ( ) ,
validationJobRepository , validationResultRepository , summaryValidationJobRepository ) ;
2023-10-02 13:08:47 +02:00
2023-07-25 14:59:12 +02:00
}
2023-07-14 13:21:21 +02:00
2023-05-29 13:40:25 +02:00
camelContext . addRoutes ( oaiPmhRouteBuilder ) ;
2023-06-20 14:42:16 +02:00
2023-09-13 12:23:27 +02:00
} catch ( Exception e ) {
log . error ( " Validation job stopped unexpectedly. " + e . getMessage ( ) ) ;
2023-06-13 14:57:39 +02:00
validationJob . progress = " STOPPED " ;
2023-05-29 13:40:25 +02:00
validationJob . endDate = new Date ( ) ;
2023-06-13 14:57:39 +02:00
validationJob . score = resultSum / validationJob . recordsTested ;
validationJob . status = validationResultRepository . getStatus ( validationJob . id ) ;
2023-09-13 12:23:27 +02:00
validationJob . exceptionClass = e . getClass ( ) . getCanonicalName ( ) ;
validationJob . exceptionMessage = e . getMessage ( ) ;
2023-05-29 13:40:25 +02:00
validationJobRepository . save ( validationJob ) ;
2023-09-13 12:23:27 +02:00
return ResponseEntity . status ( HttpStatus . SC_UNPROCESSABLE_ENTITY ) . body ( validationJob ) ;
2023-05-29 13:40:25 +02:00
}
2023-07-25 14:59:12 +02:00
2023-09-13 12:23:27 +02:00
return ResponseEntity . ok ( validationJob ) ;
2023-05-29 13:40:25 +02:00
}
2023-10-02 13:08:47 +02:00
/ * @RequestMapping ( value = { " /getSets " } , method = RequestMethod . GET , produces = MediaType . APPLICATION_JSON_VALUE )
public void getSets ( @RequestParam ( name = " baseUrl " , defaultValue = " http://repositorium.sdum.uminho.pt/oai/request " ) String baseURL ) throws Exception {
String oaiListEndpoint = " oaipmh:// " + baseURL + " ?verb=ListSets " ;
OaiSetListRoute oaiSetListRoute = new OaiSetListRoute ( oaiListEndpoint , UUID . randomUUID ( ) . toString ( ) ) ;
camelContext . addRoutes ( oaiSetListRoute ) ;
} * /
2023-11-20 13:36:46 +01:00
@RequestMapping ( value = { " /getSets " } , method = RequestMethod . GET , produces = MediaType . APPLICATION_JSON_VALUE )
public ResponseEntity < String > getSets ( @RequestParam ( name = " baseUrl " , defaultValue = " http://repositorium.sdum.uminho.pt/oai/request " ) String baseURL //not in use now
2023-06-20 14:42:16 +02:00
) throws Exception {
2023-06-27 09:58:55 +02:00
ProducerTemplate producerTemplate = camelContext . createProducerTemplate ( ) ;
2023-11-20 13:36:46 +01:00
/*String response = producerTemplate.requestBodyAndHeader("direct:getResponse", null, "endpoint", "oaipmh://"+ baseURL + "?verb=ListSets", String.class);*/
Exchange message = producerTemplate . request ( " direct:getResponse " , exchange - > {
exchange . getMessage ( ) . setHeader ( " endpoint " , " oaipmh:// " + baseURL + " ?verb=ListSets " ) ;
} ) ;
return ResponseEntity
//TODO: maybe something better if header is not set (null and mot 200)
. status ( message . getIn ( ) . getHeader ( Exchange . HTTP_RESPONSE_CODE , 200 , Integer . class ) )
. body ( message . getIn ( ) . getBody ( String . class ) ) ;
2023-06-27 09:58:55 +02:00
2023-07-25 14:59:12 +02:00
/ * git git
2023-06-27 09:58:55 +02:00
System . out . println ( camelContext . getRoutes ( ) ) ;
2023-06-20 14:42:16 +02:00
RouteBuilder listSetsBuilder = new OaiSetListRoute ( " oaipmh:// " + baseURL + " ?verb=ListSets " ) ;
2023-06-27 09:58:55 +02:00
if ( camelContext . getRoutes ( ) . size ( ) < 1 )
2023-06-20 14:42:16 +02:00
camelContext . addRoutes ( listSetsBuilder ) ;
2023-06-27 09:58:55 +02:00
System . out . println ( camelContext . getRoutes ( ) ) ;
/ * camelContext . getRouteController ( ) . startRoute ( " oaiSetListRoute " ) ;
camelContext . getRouteController ( ) . stopRoute ( " oaiSetListRoute " ) ; * /
/ * ProducerTemplate template = camelContext . createProducerTemplate ( ) ;
String response = template . requestBody ( " direct:getResponse " , null , String . class ) ;
return response ;
* /
//System.out.println(((OaiSetListRoute) listSetsBuilder).xml);
2023-06-20 14:42:16 +02:00
}
@RequestMapping ( value = { " /demo " } , produces = MediaType . APPLICATION_JSON_VALUE , method = RequestMethod . POST )
public String demo ( ) {
// Logic to retrieve sets and convert to JSON
// Replace with your actual logic
// Example response
String jsonResponse = " { \" sets \" : [ \" Set1 \" , \" Set2 \" , \" Set3 \" ] } " ;
return jsonResponse ;
}
2023-05-03 14:00:57 +02:00
private void constructValidationJobResult ( ValidationJob validationJob , ValidationRuleResult validationRuleResult ,
Map . Entry entry , AbstractOpenAireProfile profile , Document document ) {
validationRuleResult . validationJobId = validationJob . id ;
validationRuleResult . ruleName = entry . getKey ( ) . toString ( ) ;
validationRuleResult . ruleWeight = profile . guideline ( validationRuleResult . ruleName ) . getWeight ( ) ;
validationRuleResult . recordUrl = extractRecordUrl ( document , " dc:identifier " ) ;
Guideline . Result engineResult = ( Guideline . Result ) entry . getValue ( ) ;
validationRuleResult . score = engineResult . score ( ) ;
validationRuleResult . status = engineResult . status ( ) . toString ( ) ;
validationRuleResult . internalError = engineResult . internalError ( ) ;
}
private ValidationRuleResult constructValidationJobResult ( ValidationJob validationJob ,
Map . Entry entry , AbstractOpenAireProfile profile , Document document ) {
ValidationRuleResult validationRuleResult = new ValidationRuleResult ( ) ;
validationRuleResult . validationJobId = validationJob . id ;
validationRuleResult . ruleName = entry . getKey ( ) . toString ( ) ;
validationRuleResult . ruleWeight = profile . guideline ( validationRuleResult . ruleName ) . getWeight ( ) ;
validationRuleResult . recordUrl = extractRecordUrl ( document , " dc:identifier " ) ;
Guideline . Result engineResult = ( Guideline . Result ) entry . getValue ( ) ;
validationRuleResult . score = engineResult . score ( ) ;
validationRuleResult . status = engineResult . status ( ) . toString ( ) ;
validationRuleResult . internalError = engineResult . internalError ( ) ;
return validationRuleResult ;
}
2023-07-14 13:21:21 +02:00
/ * private void saveValidationIssues ( int validationJobId , String recordUrl , String ruleName , Guideline . Result engineResult ) {
2023-05-03 14:00:57 +02:00
for ( String error : engineResult . errors ( ) ) {
2023-07-14 13:21:21 +02:00
* //*System.out.println("11111");*//*
2023-05-03 14:00:57 +02:00
ValidationIssue validationIssue = new ValidationIssue ( ) ;
validationIssue . validationJobId = validationJobId ;
validationIssue . ruleName = ruleName ;
validationIssue . recordUrl = recordUrl ;
validationIssue . issueType = " ERROR " ;
validationIssue . issueText = error ;
validationIssueRepository . save ( validationIssue ) ;
2023-07-14 13:21:21 +02:00
* //*
2023-05-03 14:00:57 +02:00
System . out . println ( validationIssue ) ;
2023-07-14 13:21:21 +02:00
* //*
2023-05-03 14:00:57 +02:00
}
for ( String warning : engineResult . warnings ( ) ) {
2023-07-14 13:21:21 +02:00
* //*
2023-05-03 14:00:57 +02:00
System . out . println ( " 22222 " ) ;
2023-07-14 13:21:21 +02:00
* //*
2023-05-03 14:00:57 +02:00
ValidationIssue validationIssue = new ValidationIssue ( ) ;
validationIssue . validationJobId = validationJobId ;
validationIssue . ruleName = ruleName ;
validationIssue . recordUrl = recordUrl ;
validationIssue . issueType = " WARNING " ;
validationIssue . issueText = warning ;
validationIssueRepository . save ( validationIssue ) ;
2023-07-14 13:21:21 +02:00
* //*
2023-05-03 14:00:57 +02:00
System . out . println ( validationIssue ) ;
2023-07-14 13:21:21 +02:00
* //*
2023-05-03 14:00:57 +02:00
}
2023-07-14 13:21:21 +02:00
} * /
2023-05-03 14:00:57 +02:00
private ValidationRuleResult constructValidationRuleResult ( int validationJobId , String recordUrl , String ruleName ,
AbstractOpenAireProfile profile , Guideline . Result engineResult ) {
ValidationRuleResult validationRuleResult = new ValidationRuleResult ( ) ;
validationRuleResult . validationJobId = validationJobId ;
validationRuleResult . ruleName = ruleName ;
validationRuleResult . ruleWeight = profile . guideline ( ruleName ) . getWeight ( ) ;
validationRuleResult . recordUrl = recordUrl ;
validationRuleResult . score = engineResult . score ( ) ;
validationRuleResult . status = engineResult . status ( ) . toString ( ) ;
validationRuleResult . internalError = engineResult . internalError ( ) ;
return validationRuleResult ;
}
//TODO consider throwing exception
private String extractRecordUrl ( Document doc , String xmlField ) {
NodeList identifierNodes = doc . getElementsByTagName ( xmlField ) ;
if ( identifierNodes . getLength ( ) > 0 ) {
Element identifierElement = ( Element ) identifierNodes . item ( 0 ) ;
return identifierElement . getTextContent ( ) ;
}
return " - " ;
}
private AbstractOpenAireProfile initializeOpenAireProfile ( String guidelinesProfileName ) {
2023-07-25 14:59:12 +02:00
System . out . println ( " GUIDELINES " + guidelinesProfileName ) ;
if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Data Archives Profile v2 " ) ) {
2023-05-03 14:00:57 +02:00
return new DataArchiveGuidelinesV2Profile ( ) ;
2023-04-11 16:41:01 +02:00
2023-07-25 14:59:12 +02:00
} else if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Literature Repositories Profile v3 " ) ) {
2023-05-03 14:00:57 +02:00
return new LiteratureGuidelinesV3Profile ( ) ;
2023-04-11 16:41:01 +02:00
2023-07-25 14:59:12 +02:00
} else if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Literature Repositories Profile v4 " ) ) {
2023-05-03 14:00:57 +02:00
return new LiteratureGuidelinesV4Profile ( ) ;
2023-07-25 14:59:12 +02:00
} else if ( guidelinesProfileName . equals ( " OpenAIRE FAIR Guidelines for Data Repositories Profile " ) ) { //in case they give only fair. TODO: is ti possible?
return new FAIR_Data_GuidelinesProfile ( ) ;
2023-05-03 14:00:57 +02:00
}
return null ;
}
2023-07-10 13:11:36 +02:00
private String initializeMetadataPrefix ( String guidelinesProfileName ) {
2023-09-19 14:48:40 +02:00
if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Data Archives Profile v2 " ) | |
guidelinesProfileName . equals ( " OpenAIRE FAIR Guidelines for Data Repositories Profile " ) ) {
2023-07-26 12:20:23 +02:00
return " oai_datacite " ;
2023-07-10 13:11:36 +02:00
2023-07-25 14:59:12 +02:00
} else if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Literature Repositories Profile v3 " ) ) {
2023-07-10 13:11:36 +02:00
return " oai_dc " ;
2023-07-25 14:59:12 +02:00
} else if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Literature Repositories Profile v4 " ) ) {
2023-07-10 13:11:36 +02:00
return " oai_openaire " ;
}
return null ;
}
2023-05-03 14:00:57 +02:00
private AbstractOpenAireProfile initializeFairProfile ( String guidelinesProfileName ) {
2023-10-19 12:17:12 +02:00
if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Data Archives Profile v2 " ) ) {
2023-05-03 14:00:57 +02:00
return new FAIR_Data_GuidelinesProfile ( ) ;
2023-10-19 12:17:12 +02:00
} else if ( guidelinesProfileName . equals ( " OpenAIRE Guidelines for Literature Repositories Profile v4 " ) ) {
2023-11-21 14:19:09 +01:00
System . out . println ( " \ n \ n \ nRETURNING FAIR LITERATURE \ n \ n \ n " ) ;
System . out . println ( " \ n \ n \ n " + new FAIR_Literature_GuidelinesV4Profile ( ) . name ( ) + " \ n \ n \ n " ) ;
2023-10-19 12:17:12 +02:00
return new FAIR_Literature_GuidelinesV4Profile ( ) ;
2023-04-11 16:41:01 +02:00
}
2023-05-03 14:00:57 +02:00
return null ;
2023-04-25 10:18:14 +02:00
}
private Document parseResponse ( InputStream stream )
throws Exception
{
DocumentBuilderFactory objDocumentBuilderFactory = null ;
DocumentBuilder objDocumentBuilder = null ;
Document doc = null ;
try
{
objDocumentBuilderFactory = DocumentBuilderFactory . newInstance ( ) ;
objDocumentBuilder = objDocumentBuilderFactory . newDocumentBuilder ( ) ;
doc = objDocumentBuilder . parse ( stream ) ;
}
catch ( Exception ex )
{
throw ex ;
}
return doc ;
}
@RequestMapping ( value = { " /validateOAIPMH " } , method = RequestMethod . POST )
public void validateOAIPMH ( @RequestParam ( name = " guidelines " ) String guidelinesProfileName ,
@RequestParam ( name = " baseUrl " , defaultValue = " localhost " ) String baseUrl , //not in use now
@RequestParam ( name = " numberOfRecords " , defaultValue = " 10 " ) int numberOfRecords , //not in use now
@RequestBody String OAIPMHResponse ) {
2023-10-02 13:08:47 +02:00
ValidationJob validationJob = new ValidationJob ( baseUrl , null , numberOfRecords ) ;
2023-04-25 10:18:14 +02:00
List < RuleInfo > resultRules = null ;
List < RuleInfo > fairRules = null ;
2023-05-03 14:00:57 +02:00
AbstractOpenAireProfile profile = initializeOpenAireProfile ( guidelinesProfileName ) ;
AbstractOpenAireProfile fairProfile = initializeFairProfile ( guidelinesProfileName ) ;
2023-04-11 16:41:01 +02:00
validationJob . guidelines = profile . name ( ) ;
validationJobRepository . save ( validationJob ) ;
2023-06-12 14:48:10 +02:00
log . info ( " Initial validation job id " + validationJob . id ) ;
2023-04-11 16:41:01 +02:00
int record = 0 ;
double resultSum = 0 ;
try {
List < String > recordXmls = extractRecordXmls ( OAIPMHResponse ) ;
2023-05-03 14:00:57 +02:00
List < ValidationRuleResult > validationRuleResults = new ArrayList < > ( ) ;
2023-04-11 16:41:01 +02:00
DocumentBuilderFactory dbf = DocumentBuilderFactory . newInstance ( ) ;
DocumentBuilder db = dbf . newDocumentBuilder ( ) ;
for ( String recordXml : recordXmls ) {
Document doc = db . parse ( new InputSource ( new StringReader ( recordXml ) ) ) ;
if ( profile ! = null ) {
resultRules = new ArrayList < > ( ) ;
//what id is that?
XMLApplicationProfile . ValidationResult validationResult = profile . validate ( " id " , doc ) ;
Map < String , Guideline . Result > results = validationResult . results ( ) ;
for ( Map . Entry entry : results . entrySet ( ) ) {
2023-05-03 14:00:57 +02:00
ValidationRuleResult validationRuleResult = new ValidationRuleResult ( ) ;
validationRuleResult . validationJobId = validationJob . id ;
validationRuleResult . ruleName = entry . getKey ( ) . toString ( ) ;
validationRuleResult . recordUrl = " localhost://records/record[ " + record + " ] " ; // silly id
2023-04-11 16:41:01 +02:00
Guideline . Result engineResult = ( Guideline . Result ) entry . getValue ( ) ;
2023-05-03 14:00:57 +02:00
validationRuleResult . score = engineResult . score ( ) ;
validationRuleResult . status = engineResult . status ( ) . toString ( ) ;
validationRuleResult . internalError = engineResult . internalError ( ) ;
System . out . println ( validationRuleResult + " | " + validationRuleResult . hashCode ( ) + " \ n " ) ;
validationResultRepository . save ( validationRuleResult ) ;
2023-04-11 16:41:01 +02:00
2023-05-03 14:00:57 +02:00
validationRuleResults . add ( validationRuleResult ) ;
resultSum + = engineResult . score ( ) ;
2023-04-11 16:41:01 +02:00
}
}
record + + ;
}
2023-06-13 14:57:39 +02:00
validationJob . progress = " COMPLETED " ;
2023-04-11 16:41:01 +02:00
}
catch ( Exception e ) {
2023-09-13 12:23:27 +02:00
log . error ( " Validation job stopped unexpectedly. " + e . getMessage ( ) ) ;
2023-06-13 14:57:39 +02:00
validationJob . progress = " STOPPED " ;
2023-09-13 12:23:27 +02:00
validationJob . exceptionClass = e . getClass ( ) . getCanonicalName ( ) ;
validationJob . exceptionMessage = e . getMessage ( ) ;
2023-04-11 16:41:01 +02:00
} finally {
validationJob . endDate = new Date ( ) ;
2023-06-12 14:48:10 +02:00
log . info ( " Final validation job " + validationJob . id ) ;
2023-04-11 16:41:01 +02:00
validationJob . recordsTested = record ;
validationJob . score = resultSum / record ;
validationJobRepository . save ( validationJob ) ;
}
}
2023-05-03 14:00:57 +02:00
2023-04-11 16:41:01 +02:00
public List < String > extractRecordXmls ( String xml ) throws Exception {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory . newInstance ( ) ;
DocumentBuilder dBuilder = dbFactory . newDocumentBuilder ( ) ;
Document doc = dBuilder . parse ( new InputSource ( new StringReader ( xml ) ) ) ;
XPathFactory xfactory = XPathFactory . newInstance ( ) ;
XPath xpath = xfactory . newXPath ( ) ;
XPathExpression recordsExpression = xpath . compile ( " //record " ) ;
NodeList recordNodes = ( NodeList ) recordsExpression . evaluate ( doc , XPathConstants . NODESET ) ;
List < String > records = new ArrayList < String > ( ) ;
for ( int i = 0 ; i < recordNodes . getLength ( ) ; + + i ) {
Node element = recordNodes . item ( i ) ;
StringWriter stringWriter = new StringWriter ( ) ;
Transformer xform = TransformerFactory . newInstance ( ) . newTransformer ( ) ;
xform . setOutputProperty ( OutputKeys . OMIT_XML_DECLARATION , " yes " ) ; // optional
xform . setOutputProperty ( OutputKeys . INDENT , " yes " ) ; // optional
xform . transform ( new DOMSource ( element ) , new StreamResult ( stringWriter ) ) ;
records . add ( stringWriter . toString ( ) ) ;
}
return records ;
}
2023-07-24 15:34:40 +02:00
@RequestMapping ( value = { " /test " } , method = RequestMethod . GET )
public void test ( @RequestParam ( name = " guidelines " ) String guidelinesProfileName ,
@RequestParam ( name = " baseUrl " , defaultValue = " http://repositorium.sdum.uminho.pt/oai/request " ) String baseURL //not in use now
//@RequestParam(name="metadataPrefix", defaultValue = "oai_dc") String metadataPrefix
) {
AbstractOpenAireProfile profile = initializeOpenAireProfile ( guidelinesProfileName ) ;
AbstractOpenAireProfile fairProfile = initializeFairProfile ( guidelinesProfileName ) ;
String metadataPrefix = initializeMetadataPrefix ( guidelinesProfileName ) ;
String endpoint = " oaipmh:// " + baseURL + " ?verb=ListRecords&metadataPrefix= " + metadataPrefix ;
ProducerTemplate producerTemplate = camelContext . createProducerTemplate ( ) ;
// String response = producerTemplate.requestBodyAndHeader("direct:oaipmhRequest", null, "endpoint", endpoint, String.class);
producerTemplate . sendBody ( " direct:startProcessing " , endpoint ) ;
//String response = camelContext.createProducerTemplate().requestBody("direct:oaipmhRequest", endpoint, String.class);
}
2023-10-19 12:17:12 +02:00
}