package eu.dnetlib.validator2.validation.guideline.openaire; //import com.google.gson.Gson; import eu.dnetlib.validator2.engine.Status; import eu.dnetlib.validator2.validation.XMLApplicationProfile; import eu.dnetlib.validator2.validation.guideline.*; import eu.dnetlib.validator2.validation.utils.ResultUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import java.util.*; import java.util.stream.Collectors; import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE; import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N; public final class FAIR_Literature_GuidelinesV4Profile extends AbstractOpenAireProfile { private static final String[] TITLE_TYPES = { "AlternativeTitle", "Subtitle", "TranslatedTitle", "Other" }; private static final String[] NAME_TYPES = { "Organizational", "Personal" }; private static final String[] CONTRIBUTOR_TYPES = { "ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", "Editor", "HostingInstitution", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", "RegistrationAuthority", "RelatedPerson", "Researcher", "ResearchGroup", "RightsHolder", "Sponsor", "Supervisor", "WorkPackageLeader", "Other" }; private static final String[] FUNDER_IDENTIFIER_TYPES = { "ISNI", "GRID", "Crossref Funder" }; private static final String[] IDENTIFIER_TYPES = { "ARK", "arXiv", "bibcode", "DOI", "EAN13", "EISSN", "Handle", "IGSN", "ISBN", "ISSN", "ISTC", "LISSN", "LSID", "PISSN", "PMID", "PURL", "UPC", "URL", "URN", "WOS", }; private static final String[] RELATION_TYPES = { "IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy", "IsContinuedBy", "Continues", "IsDescribedBy", "Describes", "HasMetadata", "IsMetadataFor", "HasVersion", "IsVersionOf", "IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart", "IsReferencedBy", "References", "IsDocumentedBy", "Documents", "IsCompiledBy", "Compiles", "IsVariantFormOf", "IsOriginalFormOf", "IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom", "IsSourceOf", "IsRequiredBy", "Requires" }; private static final String[] RELATED_RESOURCE_GENERAL_TYPES = { "Audiovisual", "Collection", "DataPaper", "Dataset", "Event", "Image", "InteractiveResource", "Model", "PhysicalObject", "Service", "Software", "Sound", "Text", "Workflow", "Other" }; private static final String[] EMBARGO_DATE_TYPES = { "Accepted", "Available" }; private static final String[] PUBLICATION_DATE_TYPE = { "Issued" }; private static final String[] RESOURCE_GENERAL_TYPES = { "literature", "dataset", "software", "other research product" }; private static final String[] RESOURCE_CONCEPT_URIS = { "http://purl.org/coar/resource_type/c_1162", "http://purl.org/coar/resource_type/c_6501", "http://purl.org/coar/resource_type/c_545b", "http://purl.org/coar/resource_type/c_b239", "http://purl.org/coar/resource_type/c_2df8fbb1", "http://purl.org/coar/resource_type/c_dcae04bc", "http://purl.org/coar/resource_type/c_beb9", "http://purl.org/coar/resource_type/c_3e5a", "http://purl.org/coar/resource_type/c_ba08", "http://purl.org/coar/resource_type/c_3248", "http://purl.org/coar/resource_type/c_2f33", "http://purl.org/coar/resource_type/c_86bc", "http://purl.org/coar/resource_type/c_816b", "http://purl.org/coar/resource_type/c_8042", "http://purl.org/coar/resource_type/c_71bd", "http://purl.org/coar/resource_type/c_18gh", "http://purl.org/coar/resource_type/c_18ws", "http://purl.org/coar/resource_type/c_18hj", "http://purl.org/coar/resource_type/c_18op", "http://purl.org/coar/resource_type/c_186u", "http://purl.org/coar/resource_type/c_18wq", "http://purl.org/coar/resource_type/c_18wz", "http://purl.org/coar/resource_type/c_18ww", "http://purl.org/coar/resource_type/c_efa0", "http://purl.org/coar/resource_type/c_baaf", "http://purl.org/coar/resource_type/c_ba1f", "http://purl.org/coar/resource_type/c_93fc", "http://purl.org/coar/resource_type/c_15cd", "http://purl.org/coar/resource_type/c_18co", "http://purl.org/coar/resource_type/c_18cp", "http://purl.org/coar/resource_type/c_6670", "http://purl.org/coar/resource_type/c_5794", "http://purl.org/coar/resource_type/c_c94f", "http://purl.org/coar/resource_type/c_f744", "http://purl.org/coar/resource_type/c_7a1f", "http://purl.org/coar/resource_type/c_bdcc", "http://purl.org/coar/resource_type/c_db06", "http://purl.org/coar/resource_type/c_46ec", "http://purl.org/coar/resource_type/c_0857", "http://purl.org/coar/resource_type/c_8544", "http://purl.org/coar/resource_type/c_18cf", "http://purl.org/coar/resource_type/c_18cw", "http://purl.org/coar/resource_type/c_18cd", "http://purl.org/coar/resource_type/c_18cc", "http://purl.org/coar/resource_type/c_12ce", "http://purl.org/coar/resource_type/c_8a7e", "http://purl.org/coar/resource_type/c_ecc8", "http://purl.org/coar/resource_type/c_c513", "http://purl.org/coar/resource_type/c_12cd", "http://purl.org/coar/resource_type/c_12cc", "http://purl.org/coar/resource_type/c_5ce6", "http://purl.org/coar/resource_type/c_ddb1", "http://purl.org/coar/resource_type/c_e9a0", "http://purl.org/coar/resource_type/c_7ad9", "http://purl.org/coar/resource_type/c_393c", "http://purl.org/coar/resource_type/c_1843" }; private static final String[] RESOURCE_IDENTIFIER_TYPES = { "ARK", "DOI", "Handle", "PURL", "URL", "URN" }; private static final String[] ACCESS_RIGHTS_TYPES = { "open access", "embargoed access", "restricted access", "metadata only access" }; private static final String[] ACCESS_RIGHTS_URIS = { "http://purl.org/coar/access_right/c_abf2", "http://purl.org/coar/access_right/c_f1cf", "http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_14cb" }; private static final String[] RESOURCE_VERSION_URIS = { "http://purl.org/coar/version/c_b1a7d7d4d402bcce", "http://purl.org/coar/version/c_71e4c1898caa6e32", "http://purl.org/coar/version/c_ab4af688f83e57aa", "http://purl.org/coar/version/c_fa2ee174bc00049f", "http://purl.org/coar/version/c_970fb48d4fbd8a85", "http://purl.org/coar/version/c_e19f295774971610", "http://purl.org/coar/version/c_dc82b40f9837b551", "http://purl.org/coar/version/c_be7fb7dd8ff6fe43" }; private static final String[] RESOURCE_VERSION_LABELS = { "AO", "SMUR", "AM", "P", "VoR", "CVoR", "EVoR", "NA" }; private static final String[] FILE_OBJECT_TYPES = { "fulltext", "dataset", "software", "other" }; private static final String[] AUDIENCE_VOCABULARY = { "Administrators", "Community Groups", "Counsellors", "Federal Funds Recipients and Applicants", "Librarians", "News Media", "Other", "Parents and Families", "Policymakers", "Researchers", "School Support Staff", "Student Financial Aid Providers", "Students", "Teachers" }; // persistent identifier private static final ElementSpec F1_01D_SPEC = Builders. forMandatoryElement("datacite:identifier", ONE). withMandatoryAttribute("identifierType", RESOURCE_IDENTIFIER_TYPES). build(); // persistent identifier private static final ElementSpec F1_02D_SPEC = Builders. forMandatoryElement("identifier", ONE). withMandatoryAttribute("identifierType", IDENTIFIER_TYPES). build(); // // To be implemented: In the case of OpenAIRE we suppose it succeeds // private static final ElementSpec F4_01M_SPEC = Builders. // forMandatoryElement("identifier", ONE). // withMandatoryAttribute("identifierType", RESOURCE_IDENTIFIER_TYPES). // build(); private static final ElementSpec A1_01M_SPEC = Builders. forMandatoryElement("datacite:rights", ONE_TO_N). withMandatoryAttribute("rightsURI", ACCESS_RIGHTS_URIS). build(); // // To be implemented // private static final ElementSpec A2_01M_SPEC = Builders. // forMandatoryElement("subject", ONE_TO_N). // withMandatoryAttribute("subjectScheme"). // withMandatoryAttribute("schemeURI"). // build(); // // To be implemented // private static final ElementSpec I1_01M_SPEC = Builders. // forMandatoryElement("subject", ONE_TO_N). // build(); // // To be implemented // // I1_02M Metadata uses semantic resources // private static final ElementSpec I1_02M_SPEC = Builders. // forMandatoryElement("subject", ONE). // build(); private static final ElementSpec I3_01M_SPEC_1 = Builders. forRecommendedRepeatableElement("datacite:relatedIdentifier"). withMandatoryAttribute("relatedIdentifierType", IDENTIFIER_TYPES). withMandatoryAttribute("relationType", RELATION_TYPES). //TODO: For following 3 attributes. Need a way to target relationType attribute of current element // - Should be used only with relation type (HasMetadata/IsMetadataFor). withOptionalAttribute("relatedMetadataScheme"). withOptionalAttribute("schemeURI"). withOptionalAttribute("schemeType"). withOptionalAttribute("resourceTypeGeneral", RELATED_RESOURCE_GENERAL_TYPES). build(); // private static final ElementSpec I3_01M_SPEC_2 = Builders. // forMandatoryElement("creator", ONE_TO_N). // withMandatoryAttribute("creatorName"). // withMandatoryAttribute("nameIdentifier", NAME_IDENTIFIER_SCHEMES). // withMandatoryAttribute("nameIdentifierScheme", NAME_IDENTIFIER_SCHEMES). // withMandatoryAttribute("schemeURI", NAME_IDENTIFIER_SCHEMES_URIS). // withMandatoryAttribute("affiliation", NAME_IDENTIFIER_SCHEMES). // build(); // TO BE IMPLEMENTED private static final ElementSpec R1_1_01M_SPEC = Builders. forRecommendedElement("oaire:licenseCondition"). withMandatoryIfApplicableAttribute("uri", elementIsPresent("oaire:licenseCondition")). withMandatoryIfApplicableAttribute("startDate", elementIsPresent("oaire:licenseCondition")). build(); //TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 10. public static SyntheticGuideline F1_01D = SyntheticGuideline.of("Data is identified by a persistent identifier", 10, F1_01D_SPEC); public static SyntheticGuideline F1_02D = SyntheticGuideline.of("Data is identified by a globally unique identifier", 10, F1_02D_SPEC); // public static SyntheticGuideline F4_01M = SyntheticGuideline.of("F4_01M", 10, F4_01M_SPEC); public static SyntheticGuideline A1_01M = SyntheticGuideline.of("Metadata contains information to enable the user to get access to the data", 10, A1_01M_SPEC); // public static SyntheticGuideline A2_01M = SyntheticGuideline.of("A2_01M", 10, A2_01M_SPEC); // public static SyntheticGuideline I1_01M = SyntheticGuideline.of("I1_01M", 10, I1_01M_SPEC); // public static SyntheticGuideline I1_02M = SyntheticGuideline.of("I1_02M", 10, I1_02M_SPEC); public static SyntheticGuideline I3_01M_1 = SyntheticGuideline.of("Metadata includes references to other metadata", 10, I3_01M_SPEC_1); // public static SyntheticGuideline I3_01M_2 = SyntheticGuideline.of("I3_01M_2", 5, I3_01M_SPEC_2); // public static SyntheticGuideline R1_01M = SyntheticGuideline.of("R1_01M", 10, R1_01M_SPEC); public static SyntheticGuideline R1_1_01M = SyntheticGuideline.of("Metadata includes information about the licence under which the data can be reused", 10, R1_1_01M_SPEC); private static final List> GUIDELINES = Collections.unmodifiableList( Arrays.asList( F1_01D, F1_02D, new F2_01M_SPEC(), new F3_01M_SPEC(), // F4_01M, A1_01M, // A2_01M, // I1_01M, // I1_02M, new I2_01M_SPEC(), I3_01M_1, // I3_01M_2, // R1_01M, new R1_01M_SPEC(), R1_1_01M, new R1_2_01M_SPEC() // new MetadataCompleteness() // new I3_01M() ) ); private static final Map GUIDELINE_MAP = GUIDELINES.stream(). collect(Collectors.toMap(Guideline::getName, (guideline) -> guideline)); private static final int MAX_SCORE = GUIDELINES.stream().map(Guideline::getWeight).reduce(0, Integer::sum); public FAIR_Literature_GuidelinesV4Profile() { super("OpenAIRE FAIR Guidelines for for Literature Repositories Profile v4"); } @Override public Collection> guidelines() { return GUIDELINES; } /** * * @param guidelineName * @return */ @Override public Guideline guideline(String guidelineName) { return GUIDELINE_MAP.get(guidelineName); } @Override public int maxScore() { return MAX_SCORE; } } //// TODO this goes to FAIRProfile //class MetadataCompleteness extends AbstractGuideline { // // public MetadataCompleteness() { // super("MetadataCompleteness", 40); // } // // @Override // public Result validate(String id, Document t) { // DataArchiveGuidelinesV2Profile profile = new DataArchiveGuidelinesV2Profile(); // // // <>ODO: iterate over results and build one Guideline.Result // try { //// System.out.println("Processing MetadataCompleteness..."); // XMLApplicationProfile.ValidationResult res = profile.validate(id, t); // Map results = res.results(); // int MaxScoreMetadataCompleteness = (int) ((res.score()*getWeight())/100); // //// System.out.println("Max score DataValidator(%): " + res.score()); //// System.out.println("Weight FAIRG: " + getWeight()); //// System.out.println("Max score MetadataCompleteness: " + MaxScoreMetadataCompleteness); //// System.out.println("\n\n\n\n"); // //// for (Map.Entry entry : results.entrySet()) { //// System.out.println(entry.getKey() + " = " + entry.getValue()); //// } //// System.out.println(score); // return getResult(MaxScoreMetadataCompleteness); // //// System.out.println(tempp.status() + " - " + tempp.score()); //// String printout = results.entrySet().stream(). //// map(entry -> entry.getValue() + ": " + entry.getKey()).collect(Collectors.joining("\n")); //// System.out.println(printout); // //// System.out.println("\n\n\n\n"); // } catch (Exception e) { // System.out.println(e.getMessage()); // System.out.println(e); // e.printStackTrace(); // } // // return null; // } // // private static Result getResult(int score) { // String aa; // aa = (score > 0) ? "SUCCESS" : "FAILURE"; // return new Result() { // @Override // public int score() { // return score; // } // // @Override // public Status status() { //// return null; // return Status.valueOf(aa); // } // // @Override // public Iterable warnings() { return null; } // // @Override // public Iterable errors() { // return null; // } // // @Override // public String internalError() { // return null; // } // }; // } //} class F2_01M_SPEC_LIT extends AbstractGuideline { private static final Logger logger = LoggerFactory.getLogger(F2_01M_SPEC.class); public F2_01M_SPEC_LIT() { super("Rich metadata is provided to allow discovery", 2*6); } @Override public Result validate(String id, Document t) { F2_01M_LIT profile = new F2_01M_LIT(); // TODO: iterate over results and build one Guideline.Result try { XMLApplicationProfile.ValidationResult res_F = profile.validate(id, t); Map results = res_F.results(); // Get actual score and not (%) to incorporate to FAIR score final int MaxScoreF2_01M_LIT_SPEC = (int) ((res_F.score()*getWeight())/100); logger.debug("Max score DataValidator(%): " + res_F.score()); logger.debug("Weight FAIRG: " + getWeight()); logger.debug("Max score F2_01M_SPEC: " + MaxScoreF2_01M_LIT_SPEC); List warnings2 = new ArrayList<>(); List errors2 = new ArrayList<>(); int score = 0; for (Map.Entry entry : results.entrySet()) { String key = entry.getKey(); String warningsStr = results.get(key).warnings().toString(); String errorsStr = results.get(key).errors().toString(); if ( warningsStr.length() > 2 ) { warnings2.add(warningsStr); } if ( errorsStr.length() > 2 ) { errors2.add(errorsStr); } if (entry.getValue().toString().contains("SUCCESS")) { score += 2; } if ( logger.isTraceEnabled() ) { logger.trace(String.valueOf(results.get(key).warnings().getClass())); logger.trace(String.valueOf(warnings2.getClass())); } } final Result ress = getResult(warnings2, errors2, score); return new StandardResult(ress.score(), ress.status(), (List) ress.warnings(), (List) ress.errors(), ress.internalError()); } catch (Exception e) { logger.error("", e); return null; } } private static Result getResult(List warnings2, List errors2, int score2) { return ResultUtils.getNewResult(warnings2, errors2, score2); } } class F3_01M_SPEC_LIT extends AbstractGuideline { private static final Logger logger = LoggerFactory.getLogger(F3_01M_SPEC.class); public F3_01M_SPEC_LIT() { super("Metadata includes the identifier for the data", 2*2); } @Override public Result validate(String id, Document t) { F3_01M_LIT profile = new F3_01M_LIT(); // TODO: iterate over results and build one Guideline.Result try { // System.out.println("\nMetadata includes the identifier for the data"); XMLApplicationProfile.ValidationResult res_F = profile.validate(id, t); Map results = res_F.results(); // int MaxScoreF3_01M_LIT_SPEC = (int) ((res_F.score()*getWeight())/100); int MaxScoreF3_01M_LIT_SPEC; if ((int) res_F.score() == 50 ) { MaxScoreF3_01M_LIT_SPEC = (int) (((2*res_F.score())*getWeight())/100); } else { MaxScoreF3_01M_LIT_SPEC = (int) ((res_F.score()*getWeight())/100); } ArrayList warnings2 = new ArrayList<>(); ArrayList errors2 = new ArrayList<>(); for (Map.Entry entry : results.entrySet()) { String key = entry.getKey(); String warningsStr = results.get(key).warnings().toString(); String errorsStr = results.get(key).errors().toString(); if ( warningsStr.length() > 2 ) { warnings2.add(warningsStr); } if ( errorsStr.length() > 2 ) { errors2.add(errorsStr); } } final Result ress = getResult(warnings2, errors2, MaxScoreF3_01M_LIT_SPEC); return new StandardResult(ress.score(), ress.status(), (List) ress.warnings(), (List) ress.errors(), ress.internalError()); } catch (Exception e) { logger.error("", e); return null; } } private static Result getResult(ArrayList warnings2, ArrayList errors2, int score2) { return ResultUtils.getNewResult(warnings2, errors2, score2); } } class I2_01M_SPEC_LIT extends AbstractGuideline { private static final Logger logger = LoggerFactory.getLogger(I2_01M_SPEC_LIT.class); public I2_01M_SPEC_LIT() { super("Metadata uses FAIR-compliant vocabularies", 5*2); } @Override public Result validate(String id, Document t) { I2_01M_LIT profile = new I2_01M_LIT(); // TODO: iterate over results and build one Guideline.Result try { logger.debug("Metadata uses FAIR-compliant vocabularies"); XMLApplicationProfile.ValidationResult res_F = profile.validate(id, t); Map results = res_F.results(); // int MaxScoreI2_01M_SPEC = (int) ((res_F.score()*getWeight())/100); int MaxScoreI2_01M_LIT_SPEC; if ((int) res_F.score() == 50 ) { MaxScoreI2_01M_LIT_SPEC = (int) (((2*res_F.score())*getWeight())/100); } else { MaxScoreI2_01M_LIT_SPEC = (int) ((res_F.score()*getWeight())/100); } ArrayList warnings2 = new ArrayList<>(); ArrayList errors2 = new ArrayList<>(); for (Map.Entry entry : results.entrySet()) { String key = entry.getKey(); String warningsStr = results.get(key).warnings().toString(); String errorsStr = results.get(key).errors().toString(); if ( warningsStr.length() > 2 ) { warnings2.add(warningsStr); } if ( errorsStr.length() > 2 ) { errors2.add(errorsStr); } } final Result ress = getResult(warnings2, errors2, MaxScoreI2_01M_LIT_SPEC); return new StandardResult(ress.score(), ress.status(), (List) ress.warnings(), (List) ress.errors(), ress.internalError()); } catch (Exception e) { logger.error("", e); } return null; } private static Result getResult(ArrayList warnings2, ArrayList errors2, int score2) { return ResultUtils.getNewResult(warnings2, errors2, score2); } } class R1_01M_SPEC_LIT extends AbstractGuideline { private static final Logger logger = LoggerFactory.getLogger(R1_01M_SPEC_LIT.class); public R1_01M_SPEC_LIT() { super("Plurality of accurate and relevant attributes are provided to allow reuse", 3*4); } @Override public Result validate(String id, Document t) { R1_01M_LIT profile = new R1_01M_LIT(); // TODO: iterate over results and build one Guideline.Result try { // System.out.println("\nPlurality of accurate and relevant attributes are provided to allow reuse"); XMLApplicationProfile.ValidationResult res_F = profile.validate(id, t); Map results = res_F.results(); int MaxScoreR1_01M_SPEC = (int) ((res_F.score()*getWeight())/100); ArrayList warnings2 = new ArrayList<>(); ArrayList errors2 = new ArrayList<>(); int score = 0; for (Map.Entry entry : results.entrySet()) { String key = entry.getKey(); String warningsStr = results.get(key).warnings().toString(); String errorsStr = results.get(key).errors().toString(); if ( warningsStr.length() > 2 ) { warnings2.add(warningsStr); } if ( errorsStr.length() > 2 ) { errors2.add(errorsStr); } if (entry.getValue().toString().contains("SUCCESS")) { score += 3; } } final Result ress = getResult(warnings2, errors2, score); return new StandardResult(ress.score(), ress.status(), (List) ress.warnings(), (List) ress.errors(), ress.internalError()); } catch (Exception e) { logger.error("", e); } return null; } private static Result getResult(ArrayList warnings2, ArrayList errors2, int score2) { return ResultUtils.getNewResult(warnings2, errors2, score2); } } class R1_2_01M_SPEC_LIT extends AbstractGuideline { private static final Logger logger = LoggerFactory.getLogger(R1_2_01M_SPEC_LIT.class); public R1_2_01M_SPEC_LIT() { super("Metadata includes provenance information according to a cross-community language", 2*5); } @Override public Result validate(String id, Document t) { R1_2_01M_LIT profile = new R1_2_01M_LIT(); // TODO: iterate over results and build one Guideline.Result try { // System.out.println("\nMetadata includes provenance information according to a cross-community language"); XMLApplicationProfile.ValidationResult res_F = profile.validate(id, t); Map results = res_F.results(); int MaxScoreR1_2_01M_SPEC = (int) ((res_F.score()*getWeight())/100); ArrayList warnings2 = new ArrayList<>(); ArrayList errors2 = new ArrayList<>(); int score = 0; for (Map.Entry entry : results.entrySet()) { String key = entry.getKey(); String warningsStr = results.get(key).warnings().toString(); String errorsStr = results.get(key).errors().toString(); if ( warningsStr.length() > 2 ) { warnings2.add(warningsStr); } if ( errorsStr.length() > 2 ) { errors2.add(errorsStr); } if (entry.getValue().toString().contains("SUCCESS")) { score += 3; } } final Result ress = getResult(warnings2, errors2, score); return new StandardResult(ress.score(), ress.status(), (List) ress.warnings(), (List) ress.errors(), ress.internalError()); } catch (Exception e) { logger.error("", e); } return null; } private static Result getResult(ArrayList warnings2, ArrayList errors2, int score2) { return ResultUtils.getNewResult(warnings2, errors2, score2); } }