364 lines
17 KiB
Java
364 lines
17 KiB
Java
package eu.dnetlib.validator2.validation.guideline.openaire;
|
|
|
|
//import com.google.gson.Gson;
|
|
|
|
import eu.dnetlib.validator2.validation.guideline.*;
|
|
import org.w3c.dom.Document;
|
|
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
|
|
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE;
|
|
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N;
|
|
|
|
public final class FAIR_Literature_GuidelinesProfile extends AbstractOpenAireProfile {
|
|
|
|
private static final String[] TITLE_TYPES = {
|
|
"AlternativeTitle", "Subtitle", "TranslatedTitle", "Other"
|
|
};
|
|
|
|
private static final String[] NAME_TYPES = {
|
|
"Organizational", "Personal"
|
|
};
|
|
|
|
private static final String[] CONTRIBUTOR_TYPES = {
|
|
"ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor",
|
|
"Editor", "HostingInstitution", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember",
|
|
"RegistrationAgency", "RegistrationAuthority", "RelatedPerson", "Researcher", "ResearchGroup",
|
|
"RightsHolder", "Sponsor", "Supervisor", "WorkPackageLeader", "Other"
|
|
};
|
|
|
|
private static final String[] FUNDER_IDENTIFIER_TYPES = {
|
|
"ISNI", "GRID", "Crossref Funder"
|
|
};
|
|
|
|
private static final String[] CONTRIBUTOR_NAME_IDENTIFIER_TYPES = {
|
|
"Funder", "FundingProgramme", "ProjectID", "Jurisdiction", "ProjectName", "ProjectAcronym"
|
|
};
|
|
|
|
private static final String[] rightsURIList = {
|
|
"info:eu-repo/semantics/closedAccess",
|
|
"info:eu-repo/semantics/embargoedAccess",
|
|
"info:eu-repo/semantics/restrictedAccess",
|
|
"info:eu-repo/semantics/openAccess"
|
|
};
|
|
|
|
private static final String[] IDENTIFIER_TYPES = {
|
|
"ARK", "arXiv", "bibcode", "DOI", "EAN13", "EISSN", "Handle", "IGSN", "ISBN",
|
|
"ISSN", "ISTC", "LISSN", "LSID", "PISSN", "PMID", "PURL", "UPC", "URL", "URN", "WOS",
|
|
};
|
|
|
|
private static final String[] PERSISTENT_IDENTIFIER_TYPES = {
|
|
"IGSN", "QID", "ARK", "IVOA", "CAS RN", "RRID", "URL", "RInChi", "ERM", "ISO 27729:2012 ISNI",
|
|
"EC Number", "PURL", "LSID", "TFClass Schema", "InChI", "w3id", "Handle", "DOI", "ORCID iD", "arXiv"
|
|
};
|
|
|
|
private static final String[] RELATION_TYPES = {
|
|
"IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy", "IsContinuedBy",
|
|
"Continues", "IsDescribedBy", "Describes", "HasMetadata", "IsMetadataFor", "HasVersion",
|
|
"IsVersionOf", "IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart", "IsReferencedBy",
|
|
"References", "IsDocumentedBy", "Documents", "IsCompiledBy", "Compiles", "IsVariantFormOf",
|
|
"IsOriginalFormOf", "IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom", "IsSourceOf",
|
|
"IsRequiredBy", "Requires"
|
|
};
|
|
|
|
private static final String[] RELATION_TYPES_LIMITED = {
|
|
"IsSupplementTo", "Describes", "IsMetadataFor", "IsPartOf"
|
|
};
|
|
|
|
private static final String[] RELATED_RESOURCE_GENERAL_TYPES = {
|
|
"Audiovisual", "Collection", "DataPaper", "Dataset", "Event", "Image", "InteractiveResource",
|
|
"Model", "PhysicalObject", "Service", "Software", "Sound", "Text", "Workflow", "Other"
|
|
};
|
|
|
|
private static final String[] EMBARGO_DATE_TYPES = {
|
|
"Created", "Collected"
|
|
};
|
|
|
|
private static final String[] NAME_IDENTIFIER_SCHEMES = {
|
|
"ORCID", "ISNI", "ROR", "GRID"
|
|
};
|
|
|
|
private static final String[] NAME_IDENTIFIER_SCHEMES_URIS = {
|
|
"https://orcid.org/", "http://www.isni.org/", "https://ror.org/", "https://www.grid.ac/"
|
|
};
|
|
|
|
private static final String[] RESOURCE_GENERAL_TYPES = {
|
|
"literature", "dataset", "software", "other research product"
|
|
};
|
|
|
|
private static final String[] RESOURCE_CONCEPT_URIS = {
|
|
"http://purl.org/coar/resource_type/c_1162", "http://purl.org/coar/resource_type/c_6501",
|
|
"http://purl.org/coar/resource_type/c_545b", "http://purl.org/coar/resource_type/c_b239",
|
|
"http://purl.org/coar/resource_type/c_2df8fbb1", "http://purl.org/coar/resource_type/c_dcae04bc",
|
|
"http://purl.org/coar/resource_type/c_beb9", "http://purl.org/coar/resource_type/c_3e5a",
|
|
"http://purl.org/coar/resource_type/c_ba08", "http://purl.org/coar/resource_type/c_3248",
|
|
"http://purl.org/coar/resource_type/c_2f33", "http://purl.org/coar/resource_type/c_86bc",
|
|
"http://purl.org/coar/resource_type/c_816b", "http://purl.org/coar/resource_type/c_8042",
|
|
"http://purl.org/coar/resource_type/c_71bd", "http://purl.org/coar/resource_type/c_18gh",
|
|
"http://purl.org/coar/resource_type/c_18ws", "http://purl.org/coar/resource_type/c_18hj",
|
|
"http://purl.org/coar/resource_type/c_18op", "http://purl.org/coar/resource_type/c_186u",
|
|
"http://purl.org/coar/resource_type/c_18wq", "http://purl.org/coar/resource_type/c_18wz",
|
|
"http://purl.org/coar/resource_type/c_18ww", "http://purl.org/coar/resource_type/c_efa0",
|
|
"http://purl.org/coar/resource_type/c_baaf", "http://purl.org/coar/resource_type/c_ba1f",
|
|
"http://purl.org/coar/resource_type/c_93fc", "http://purl.org/coar/resource_type/c_15cd",
|
|
"http://purl.org/coar/resource_type/c_18co", "http://purl.org/coar/resource_type/c_18cp",
|
|
"http://purl.org/coar/resource_type/c_6670", "http://purl.org/coar/resource_type/c_5794",
|
|
"http://purl.org/coar/resource_type/c_c94f", "http://purl.org/coar/resource_type/c_f744",
|
|
"http://purl.org/coar/resource_type/c_7a1f", "http://purl.org/coar/resource_type/c_bdcc",
|
|
"http://purl.org/coar/resource_type/c_db06", "http://purl.org/coar/resource_type/c_46ec",
|
|
"http://purl.org/coar/resource_type/c_0857", "http://purl.org/coar/resource_type/c_8544",
|
|
"http://purl.org/coar/resource_type/c_18cf", "http://purl.org/coar/resource_type/c_18cw",
|
|
"http://purl.org/coar/resource_type/c_18cd", "http://purl.org/coar/resource_type/c_18cc",
|
|
"http://purl.org/coar/resource_type/c_12ce", "http://purl.org/coar/resource_type/c_8a7e",
|
|
"http://purl.org/coar/resource_type/c_ecc8", "http://purl.org/coar/resource_type/c_c513",
|
|
"http://purl.org/coar/resource_type/c_12cd", "http://purl.org/coar/resource_type/c_12cc",
|
|
"http://purl.org/coar/resource_type/c_5ce6", "http://purl.org/coar/resource_type/c_ddb1",
|
|
"http://purl.org/coar/resource_type/c_e9a0", "http://purl.org/coar/resource_type/c_7ad9",
|
|
"http://purl.org/coar/resource_type/c_393c", "http://purl.org/coar/resource_type/c_1843"
|
|
};
|
|
|
|
private static final String[] RESOURCE_IDENTIFIER_TYPES = {
|
|
"ARK", "DOI", "Handle", "IGSN", "arXiv", "PURL", "URL", "URN", "PMID"
|
|
};
|
|
|
|
private static final String[] ACCESS_RIGHTS_URIS = {
|
|
"http://purl.org/coar/access_right/c_abf2", "http://purl.org/coar/access_right/c_abf2",
|
|
"http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_f1cf",
|
|
"http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_16ec",
|
|
"http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_14cb"
|
|
};
|
|
|
|
private static final String[] RESOURCE_VERSION_URIS = {
|
|
"http://purl.org/coar/version/c_b1a7d7d4d402bcce", "http://purl.org/coar/version/c_71e4c1898caa6e32",
|
|
"http://purl.org/coar/version/c_ab4af688f83e57aa", "http://purl.org/coar/version/c_fa2ee174bc00049f",
|
|
"http://purl.org/coar/version/c_970fb48d4fbd8a85", "http://purl.org/coar/version/c_e19f295774971610",
|
|
"http://purl.org/coar/version/c_dc82b40f9837b551", "http://purl.org/coar/version/c_be7fb7dd8ff6fe43"
|
|
};
|
|
|
|
private static final String[] RESOURCE_VERSION_LABELS = {
|
|
"AO", "SMUR", "AM", "P", "VoR", "CVoR", "EVoR", "NA"
|
|
};
|
|
|
|
private static final String[] DATE_TYPES = {
|
|
"Accepted", "Available", "Copyrighted", "Collected", "Created", "Issued", "Submitted", "Updated", "Valid"
|
|
};
|
|
|
|
private static final String[] FILE_OBJECT_TYPES = {
|
|
"fulltext", "dataset", "software", "other"
|
|
};
|
|
|
|
private static final String[] AUDIENCE_VOCABULARY = {
|
|
"Administrators", "Community Groups", "Counsellors", "Federal Funds Recipients and Applicants",
|
|
"Librarians", "News Media", "Other", "Parents and Families", "Policymakers", "Researchers",
|
|
"School Support Staff", "Student Financial Aid Providers", "Students", "Teachers"
|
|
};
|
|
|
|
// globally unique identifier
|
|
private static final ElementSpec F1_01D_SPEC = Builders.
|
|
forMandatoryElement("identifier", ONE).
|
|
// withMandatoryAttribute("identifierType", new PIDCheckValuePredicate()).
|
|
withMandatoryAttribute("identifierType", PERSISTENT_IDENTIFIER_TYPES).
|
|
build();
|
|
|
|
// persistent identifier
|
|
private static final ElementSpec F1_02D_SPEC = Builders.
|
|
forMandatoryElement("identifier", ONE).
|
|
withMandatoryAttribute("identifierType", IDENTIFIER_TYPES).
|
|
build();
|
|
|
|
// // To be implemented: In the case of OpenAIRE we suppose it succeeds
|
|
// private static final ElementSpec F4_01M_SPEC = Builders.
|
|
// forMandatoryElement("identifier", ONE).
|
|
// withMandatoryAttribute("identifierType", RESOURCE_IDENTIFIER_TYPES).
|
|
// build();
|
|
|
|
private static final ElementSpec A1_01M_SPEC = Builders.
|
|
forMandatoryElement("rights", ONE_TO_N).
|
|
withMandatoryAttribute("rightsURI", rightsURIList).
|
|
build();
|
|
|
|
// // To be implemented
|
|
// private static final ElementSpec A2_01M_SPEC = Builders.
|
|
// forMandatoryElement("subject", ONE_TO_N).
|
|
// withMandatoryAttribute("subjectScheme").
|
|
// withMandatoryAttribute("schemeURI").
|
|
// build();
|
|
|
|
// // To be implemented
|
|
// private static final ElementSpec I1_01M_SPEC = Builders.
|
|
// forMandatoryElement("subject", ONE_TO_N).
|
|
// build();
|
|
|
|
// // To be implemented
|
|
// // I1_02M Metadata uses semantic resources
|
|
// private static final ElementSpec I1_02M_SPEC = Builders.
|
|
// forMandatoryElement("subject", ONE).
|
|
// build();
|
|
|
|
private static final ElementSpec I3_01M_SPEC_1 = Builders.
|
|
forMandatoryElement("relatedIdentifier", ONE_TO_N).
|
|
withMandatoryAttribute("relatedIdentifierType", IDENTIFIER_TYPES).
|
|
withMandatoryAttribute("relationType", RELATION_TYPES).
|
|
build();
|
|
|
|
// private static final ElementSpec I3_01M_SPEC_2 = Builders.
|
|
// forMandatoryElement("creator", ONE_TO_N).
|
|
// withMandatoryAttribute("creatorName").
|
|
// withMandatoryAttribute("nameIdentifier", NAME_IDENTIFIER_SCHEMES).
|
|
// withMandatoryAttribute("nameIdentifierScheme", NAME_IDENTIFIER_SCHEMES).
|
|
// withMandatoryAttribute("schemeURI", NAME_IDENTIFIER_SCHEMES_URIS).
|
|
// withMandatoryAttribute("affiliation", NAME_IDENTIFIER_SCHEMES).
|
|
// build();
|
|
|
|
// TO BE IMPLEMENTED
|
|
|
|
private static final ElementSpec R1_1_01M_SPEC = Builders.
|
|
forMandatoryElement("rights", ONE_TO_N)
|
|
.atPosition(ElementPosition.FIRST)
|
|
.withMandatoryAttribute("rightsURI") // Not in the rightsURIList HOW TO IMPLEMENT?????
|
|
// .withMandatoryAttribute("rightsURI", rightsURIList) // Not in the rightsURIList HOW TO IMPLEMENT?????
|
|
.build();
|
|
|
|
//TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 10.
|
|
public static SyntheticGuideline F1_01D = SyntheticGuideline.of("Data is identified by a persistent identifier", 10, F1_01D_SPEC);
|
|
public static SyntheticGuideline F1_02D = SyntheticGuideline.of("Data is identified by a globally unique identifier", 10, F1_02D_SPEC);
|
|
// public static SyntheticGuideline F4_01M = SyntheticGuideline.of("F4_01M", 10, F4_01M_SPEC);
|
|
public static SyntheticGuideline A1_01M = SyntheticGuideline.of("Metadata contains information to enable the user to get access to the data", 10, A1_01M_SPEC);
|
|
// public static SyntheticGuideline A2_01M = SyntheticGuideline.of("A2_01M", 10, A2_01M_SPEC);
|
|
// public static SyntheticGuideline I1_01M = SyntheticGuideline.of("I1_01M", 10, I1_01M_SPEC);
|
|
// public static SyntheticGuideline I1_02M = SyntheticGuideline.of("I1_02M", 10, I1_02M_SPEC);
|
|
public static SyntheticGuideline I3_01M_1 = SyntheticGuideline.of("Metadata includes references to other metadata", 10, I3_01M_SPEC_1);
|
|
// public static SyntheticGuideline I3_01M_2 = SyntheticGuideline.of("I3_01M_2", 5, I3_01M_SPEC_2);
|
|
// public static SyntheticGuideline R1_01M = SyntheticGuideline.of("R1_01M", 10, R1_01M_SPEC);
|
|
public static SyntheticGuideline R1_1_01M = SyntheticGuideline.of("Metadata includes information about the licence under which the data can be reused", 10, R1_1_01M_SPEC);
|
|
|
|
private static final List<Guideline<Document>> GUIDELINES = Collections.unmodifiableList(
|
|
Arrays.asList(
|
|
F1_01D,
|
|
F1_02D,
|
|
new F2_01M_SPEC(),
|
|
new F3_01M_SPEC(),
|
|
// F4_01M,
|
|
A1_01M,
|
|
// A2_01M,
|
|
// I1_01M,
|
|
// I1_02M,
|
|
new I2_01M_SPEC(),
|
|
I3_01M_1,
|
|
// I3_01M_2,
|
|
// R1_01M,
|
|
new R1_01M_SPEC(),
|
|
R1_1_01M,
|
|
new R1_2_01M_SPEC()
|
|
// new MetadataCompleteness()
|
|
// new I3_01M()
|
|
)
|
|
);
|
|
|
|
private static final Map<String, Guideline> GUIDELINE_MAP = GUIDELINES.
|
|
stream().
|
|
collect(Collectors.toMap(Guideline::getName, (guideline) -> guideline));
|
|
|
|
private static final int MAX_SCORE = GUIDELINES.stream().map(Guideline::getWeight).reduce(0, Integer::sum);
|
|
|
|
public FAIR_Literature_GuidelinesProfile() {
|
|
super("OpenAIRE FAIR Guidelines for Data Repositories Profile");
|
|
}
|
|
|
|
@Override
|
|
public Collection<? extends Guideline<Document>> guidelines() {
|
|
return GUIDELINES;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param guidelineName
|
|
* @return
|
|
*/
|
|
@Override
|
|
public Guideline guideline(String guidelineName) {
|
|
return GUIDELINE_MAP.get(guidelineName);
|
|
}
|
|
|
|
@Override
|
|
public int maxScore() {
|
|
return MAX_SCORE;
|
|
}
|
|
}
|
|
|
|
|
|
//// TODO this goes to FAIRProfile
|
|
//class MetadataCompleteness extends AbstractGuideline<Document> {
|
|
//
|
|
// public MetadataCompleteness() {
|
|
// super("MetadataCompleteness", 40);
|
|
// }
|
|
//
|
|
// @Override
|
|
// public Result validate(String id, Document t) {
|
|
// DataArchiveGuidelinesV2Profile profile = new DataArchiveGuidelinesV2Profile();
|
|
//
|
|
// // <></>ODO: iterate over results and build one Guideline.Result
|
|
// try {
|
|
//// System.out.println("Processing MetadataCompleteness...");
|
|
// XMLApplicationProfile.ValidationResult res = profile.validate(id, t);
|
|
// Map<String, Result> results = res.results();
|
|
// int MaxScoreMetadataCompleteness = (int) ((res.score()*getWeight())/100);
|
|
//
|
|
//// System.out.println("Max score DataValidator(%): " + res.score());
|
|
//// System.out.println("Weight FAIRG: " + getWeight());
|
|
//// System.out.println("Max score MetadataCompleteness: " + MaxScoreMetadataCompleteness);
|
|
//// System.out.println("\n\n\n\n");
|
|
//
|
|
//// for (Map.Entry entry : results.entrySet()) {
|
|
//// System.out.println(entry.getKey() + " = " + entry.getValue());
|
|
//// }
|
|
//// System.out.println(score);
|
|
// return getResult(MaxScoreMetadataCompleteness);
|
|
//
|
|
//// System.out.println(tempp.status() + " - " + tempp.score());
|
|
//// String printout = results.entrySet().stream().
|
|
//// map(entry -> entry.getValue() + ": " + entry.getKey()).collect(Collectors.joining("\n"));
|
|
//// System.out.println(printout);
|
|
//
|
|
//// System.out.println("\n\n\n\n");
|
|
// } catch (Exception e) {
|
|
// System.out.println(e.getMessage());
|
|
// System.out.println(e);
|
|
// e.printStackTrace();
|
|
// }
|
|
//
|
|
// return null;
|
|
// }
|
|
//
|
|
// private static Result getResult(int score) {
|
|
// String aa;
|
|
// aa = (score > 0) ? "SUCCESS" : "FAILURE";
|
|
// return new Result() {
|
|
// @Override
|
|
// public int score() {
|
|
// return score;
|
|
// }
|
|
//
|
|
// @Override
|
|
// public Status status() {
|
|
//// return null;
|
|
// return Status.valueOf(aa);
|
|
// }
|
|
//
|
|
// @Override
|
|
// public Iterable<String> warnings() { return null; }
|
|
//
|
|
// @Override
|
|
// public Iterable<String> errors() {
|
|
// return null;
|
|
// }
|
|
//
|
|
// @Override
|
|
// public String internalError() {
|
|
// return null;
|
|
// }
|
|
// };
|
|
// }
|
|
//} |