AriadnePlus/dnet-ariadneplus-publisher/src/main/java/eu/dnetlib/ariadneplus/rdf/ResourceReader.java

461 lines
19 KiB
Java

package eu.dnetlib.ariadneplus.rdf;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.ariadneplus.CRM;
import eu.dnetlib.ariadneplus.CRMpe;
import eu.dnetlib.ariadneplus.catalogue.CatalogueLicense;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.jena.assembler.AssemblerHelp;
import org.apache.jena.rdf.model.*;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.RDFS;
import org.springframework.stereotype.Component;
/**
* Created by Alessia Bardi on 11/12/2017.
*
* @author Alessia Bardi
*/
@Component
public class ResourceReader {
private static final Log log = LogFactory.getLog(ResourceReader.class);
//sparql template for services include <availability> field that is not explicit in the model
private Property availibilityProperty = ResourceFactory.createProperty("","availability");
//sparql template for services include <activitytype> field that is not explicit in the model
private Property activitytypeProperty = ResourceFactory.createProperty("","activitytype");
//sparql template for datasets include <encoding> field that is generated from the creation event
private Property encodingProperty = ResourceFactory.createProperty("","encoding");
//sparql template for datasets include <used_software> field that is generated from the creation event
private Property usedSoftwareProperty = ResourceFactory.createProperty("","used_software");
//sparql template for software include <usedby> field that is generated from the creation event of the resource using the software
private Property usedByProperty = ResourceFactory.createProperty("","usedby");
//sparql template for datasets include <creator> field that is generated from the creation event
private Property creatorProperty = ResourceFactory.createProperty("","creator");
//sparql template for software include <creationtime> field that is generated from the creation event
private Property creationtimeProperty = ResourceFactory.createProperty("","creationtime");
public String getTitle(final Resource resource) {
final Statement s = resource.getProperty(CRM.P102_has_title);
if (s != null) {
RDFNode obj = s.getObject();
if(obj.isLiteral()) return obj.asLiteral().getLexicalForm();
}
//if we do not find the crm:P102_has_title, let's get the label
return getLabel(resource);
}
public String getLabel(final Resource resource) {
if(resource == null) return "";
if (resource.hasProperty(RDFS.label)) {
return resource.getProperty(RDFS.label).getString().replace("'", "\'");
} else return "";
}
public Iterator<String> getDescriptions(final Resource resource) {
StmtIterator it = resource.listProperties(CRM.P3_has_note);
return Iterators.transform(it, f -> f.getString().replace("'", "\'"));
}
//NOTE: based on the sparql templates competency is a string, in the model it is a PE36_Competency_Type
public Iterator<String> getCompetences(final Resource resource) {
StmtIterator it = resource.listProperties(CRMpe.PP45_has_competency);
return Iterators.transform(it, f -> f.getString());
}
public String getAvailability(final Resource resource) {
String availability = "";
if (resource.hasProperty(availibilityProperty)){
availability = resource.getProperty(availibilityProperty).getString();
}
return availability;
}
public String getConditionOfUse(final Resource resource) {
StmtIterator it = resource.listProperties(CRM.P16_used_specific_object);
while(it.hasNext()){
Resource obj = it.next().getResource();
if(obj.hasProperty(RDF.type, CRM.E30_Right) && obj.hasProperty(CRM.P3_has_note)){
String rightsString = obj.getProperty(CRM.P3_has_note).getString();
if(obj.hasProperty(CRM.P2_has_type)){
Resource rightType = obj.getPropertyResourceValue(CRM.P2_has_type);
rightsString += " ["+getLabel(rightType)+"]";
}
return rightsString;
}
}
return "";
}
public CatalogueLicense getCatalogueLicense(final Resource resource){
if(resource.hasProperty(CRM.P16_used_specific_object)) {
Resource obj = resource.getPropertyResourceValue(CRM.P16_used_specific_object);
if (obj.hasProperty(CRM.P2_has_type)) {
String license = getLabel(obj.getPropertyResourceValue(CRM.P2_has_type));
return CatalogueLicense.getCatalogueLicenseFor(license);
}
}
return CatalogueLicense.NotSpecified;
}
public Iterator<String> getRDFClassNames(final Resource resource){
StmtIterator it = resource.listProperties(RDF.type);
return Iterators.transform(it, f -> f.getResource().getLocalName());
}
public Iterator<String> getActivityTypes(final Resource resource){
StmtIterator it = resource.listProperties(activitytypeProperty);
return Iterators.transform(it, f -> f.getString());
}
public Iterator<String> getProviderNames(final Resource resource){
StmtIterator sit = resource.listProperties(CRMpe.PP2_provided_by);
StmtIterator sit2 = resource.listProperties(CRMpe.PP25_has_maintaining_RI);
return Iterators.transform(Iterators.concat(sit, sit2), f -> {
Resource provider = f.getResource();
return getTitle(provider);
});
}
public Iterator<String> getProviderUris(final Resource resource){
StmtIterator sit = resource.listProperties(CRMpe.PP2_provided_by);
StmtIterator sit2 = resource.listProperties(CRMpe.PP25_has_maintaining_RI);
return Iterators.transform(Iterators.concat(sit, sit2), f -> {
return f.getResource().getURI();
});
}
public Iterator<String> getProviderContactPoints(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP2_provided_by);
return Iterators.transform(it, f -> {
Resource provider = f.getResource();
if (provider.hasProperty(CRM.P76_has_contact_point)) {
Resource contactPoint = provider.getPropertyResourceValue(CRM.P76_has_contact_point);
return getLabel(contactPoint);
}
else return "";
});
}
public Iterator<String> getResourceDirectContactPointsURI(final Resource resource){
StmtIterator it = resource.listProperties(CRM.P76_has_contact_point);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getHostedStuff(final Resource resource){
//In inference we trust
StmtIterator sit4 = resource.listProperties(CRMpe.PP4_hosts_object);
return Iterators.transform(sit4, f -> f.getResource().getURI());
// StmtIterator sit6 = resource.listProperties(CRMpe.PP6_hosts_digital_object);
// StmtIterator sit7 = resource.listProperties(CRMpe.PP7_hosts_software_object);
// StmtIterator sit8 = resource.listProperties(CRMpe.PP8_hosts_dataset);
// Iterator<String> it4 = Iterators.transform(sit4, f -> f.getResource().getURI());
// Iterator<String> it6 = Iterators.transform(sit6, f -> f.getResource().getURI());
// Iterator<String> it7 = Iterators.transform(sit7, f -> f.getResource().getURI());
// Iterator<String> it8 = Iterators.transform(sit8, f -> f.getResource().getURI());
// return Iterators.concat(it4,it6, it7, it8);
}
public Iterator<String> getHostedBys(final Resource resource){
//In inference we trust
StmtIterator sit4 = resource.listProperties(CRMpe.PP4i_is_object_hosted_by);
return Iterators.transform(sit4, f -> f.getResource().getURI());
// StmtIterator sit6 = resource.listProperties(CRMpe.PP6i_is_digital_object_hosted_by);
// StmtIterator sit7 = resource.listProperties(CRMpe.PP7i_is_software_object_hosted_by);
// StmtIterator sit8 = resource.listProperties(CRMpe.PP8i_is_dataset_hosted_by);
// Iterator<String> it4 = Iterators.transform(sit4, f -> f.getResource().getURI());
// Iterator<String> it6 = Iterators.transform(sit6, f -> f.getResource().getURI());
// Iterator<String> it7 = Iterators.transform(sit7, f -> f.getResource().getURI());
// Iterator<String> it8 = Iterators.transform(sit8, f -> f.getResource().getURI());
// return Iterators.concat(it4, it6, it7, it8);
}
public Iterator<String> getCuratedObjects(final Resource resource){
//In inference we trust
StmtIterator sit32 = resource.listProperties(CRMpe.PP32_curates);
return Iterators.transform(sit32, f -> f.getResource().getURI());
// StmtIterator sit11 = resource.listProperties(CRMpe.PP11_curates_volatile_digital_object);
// StmtIterator sit12 = resource.listProperties(CRMpe.PP12_curates_volatile_software);
// StmtIterator sit13 = resource.listProperties(CRMpe.PP13_curates_volatile_dataset);
// Iterator<String> it32 = Iterators.transform(sit32, f -> f.getResource().getURI());
// Iterator<String> it11 = Iterators.transform(sit11, f -> f.getResource().getURI());
// Iterator<String> it12 = Iterators.transform(sit12, f -> f.getResource().getURI());
// Iterator<String> it13 = Iterators.transform(sit13, f -> f.getResource().getURI());
// return Iterators.concat(it32, it11, it12, it13);
}
public Iterator<String> getCuratorUrls(final Resource resource){
//In inference we trust
StmtIterator sit32 = resource.listProperties(CRMpe.PP32i_is_curated_by);
return Iterators.transform(sit32, f -> f.getResource().getURI());
// StmtIterator sit11 = resource.listProperties(CRMpe.PP11i_is_volatile_digital_object_curated_by);
// StmtIterator sit12 = resource.listProperties(CRMpe.PP12i_is_volatile_software_curated_by);
// StmtIterator sit13 = resource.listProperties(CRMpe.PP13i_is_volatile_dataset_curated_by);
// Iterator<String> it32 = Iterators.transform(sit32, f -> f.getResource().getURI());
// Iterator<String> it11 = Iterators.transform(sit11, f -> f.getResource().getURI());
// Iterator<String> it12 = Iterators.transform(sit12, f -> f.getResource().getURI());
// Iterator<String> it13 = Iterators.transform(sit13, f -> f.getResource().getURI());
// return Iterators.concat(it32, it11, it12, it13);
}
public Iterator<String> getResourceCuratorCurationPlans(final Resource resource) {
//In inference we trust
StmtIterator sit32 = resource.listProperties(CRMpe.PP32i_is_curated_by);
return Iterators.concat(Iterators.transform(sit32, c -> getCurationPlans(c.getResource())));
}
public Iterator<String> getDeliversOnRequest(final Resource resource){
StmtIterator sit = resource.listProperties(CRMpe.PP15_delivers_on_request);
return Iterators.transform(sit, f -> f.getResource().getURI());
}
public Iterator<String> getRunsOnRequest(final Resource resource){
StmtIterator sit = resource.listProperties(CRMpe.PP14_runs_on_request);
return Iterators.transform(sit, f -> f.getResource().getURI());
}
public Iterator<String> getAccessPoints(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP28_has_designated_access_point);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getDeclarativeTimes(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP42_has_declarative_time);
return Iterators.transform(it, f -> f.getString());
}
public Iterator<String> getProtocols(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP29_uses_access_protocol);
return Iterators.transform(it, f -> getLabel(f.getResource()));
}
public Iterator<String> getCurationPlans(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP31_uses_curation_plan);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getMemberUrls(final Resource resource){
StmtIterator it = resource.listProperties(CRM.P107_has_current_or_former_member);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> isMemberOf(final Resource resource){
StmtIterator it = resource.listProperties(CRM.P107i_is_current_or_former_member_of);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getProvidedServiceUrls(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP2i_provides);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getHasTypeLabels(final Resource resource){
StmtIterator it = resource.listProperties(CRM.P2_has_type);
return Iterators.transform(it, f -> {if(f.getObject().isLiteral()) return f.getObject().asLiteral().getString(); else return getLabel(f.getResource());});
}
public Iterator<String> getIsPartOfUrls(final Resource resource){
//in inference we trust
//StmtIterator it = resource.listProperties(CRMpe.PP23i_is_dataset_part_of);
StmtIterator it = resource.listProperties(CRM.P106i_forms_part_of);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getHasPartUrls(final Resource resource){
//in inference we trust
//StmtIterator it = resource.listProperties(CRMpe.PP23_has_dataset_part);
StmtIterator it = resource.listProperties(CRM.P106_is_composed_of);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getSubjects(final Resource resource){
StmtIterator it = resource.listProperties(CRM.P129_is_about);
return Iterators.transform( Iterators.filter(it, f -> !f.getResource().hasProperty(RDF.type, CRM.E4_Period) && !f.getResource().hasProperty(RDF.type, CRM.E52_Time_Span) && !f.getResource().hasProperty(RDF.type, CRM.E53_Place)), f -> getLabel(f.getResource()));
}
public List<String> getTemporalCoverages(final Resource resource){
List<String> temporalCoverages = getCoverages(resource, CRM.E4_Period);
temporalCoverages.addAll(getCoverageTimeSpan(resource));
return temporalCoverages;
}
public List<String> getSpatialCoverages(final Resource resource){
return getCoverages(resource, CRM.E53_Place);
}
private List<String> getCoverages(final Resource resource, final Resource coverageType){
List<String> cov = Lists.newArrayList();
StmtIterator it = resource.listProperties(CRM.P129_is_about);
while(it.hasNext()){
Resource r = it.next().getResource();
if(r.hasProperty(RDF.type, coverageType)){
cov.add(getLabel(r));
}
}
return cov;
}
private List<String> getCoverageTimeSpan(final Resource resource){
List<String> cov = Lists.newArrayList();
StmtIterator it = resource.listProperties(CRM.P129_is_about);
while(it.hasNext()){
Resource r = it.next().getResource();
if(r.hasProperty(RDF.type, CRM.E52_Time_Span)){
StmtIterator times = r.listProperties(CRM.P82_at_some_time_within);
while(times.hasNext()){
Resource t = times.next().getResource();
cov.add(t.toString());
}
}
}
return cov;
}
public Iterator<String> getEncodings(final Resource resource){
StmtIterator it = resource.listProperties(encodingProperty);
return Iterators.transform(it, f -> f.getString());
}
public Iterator<String> getCreatorsURIs(final Resource resource){
StmtIterator it = resource.listProperties(creatorProperty);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getMetadata(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP39i_has_metadata);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getDescribedDataset(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP39_is_metadata_for);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getOfferedServiceUrls(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP1_currently_offers);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public String getStartTime(final Resource resource){
Resource timespan = resource.getPropertyResourceValue(CRM.P4_has_time_span);
if(timespan != null){
if(timespan.hasProperty(CRM.P82a_begin_of_the_begin))
return timespan.getProperty(CRM.P82a_begin_of_the_begin).getString();
}
return "";
}
public String getFirstCreationTime(final Resource resource){
StmtIterator it = resource.listProperties(creationtimeProperty);
if(it.hasNext()){
return it.next().getString();
}
return "";
}
public Iterator<String> getMaintainerUrls(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP44_has_maintaining_team);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public List<String> getMaintainerContacts(final Resource resource){
List<String> res = Lists.newArrayList();
StmtIterator it = resource.listProperties(CRMpe.PP44_has_maintaining_team);
while(it.hasNext()){
Resource maintainer = it.next().getResource();
Iterator<String> itM = getResourceDirectContactPointsURI(maintainer);
while(itM.hasNext()){
res.add(itM.next());
}
}
return res;
}
public Iterator<String> getMaintainersLabels(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP44_has_maintaining_team);
return Iterators.transform(it, f -> getLabel(f.getResource()));
}
public Iterator<String> getMaintainedUrls(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP44i_is_maintaining_team_of);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getSnapshots(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP17_has_snapshot);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getIsSnapshotOfs(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP17i_is_snapshot_of);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getHasReleases(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP22_has_release);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getIsReleaseOfs(final Resource resource){
StmtIterator it = resource.listProperties(CRMpe.PP22i_is_release_of);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getUsedSoftware(final Resource resource){
StmtIterator it = resource.listProperties(usedSoftwareProperty);
return Iterators.transform(it, f -> f.getResource().getURI());
}
public Iterator<String> getUsedBy(final Resource resource){
StmtIterator it = resource.listProperties(usedByProperty);
return Iterators.transform(it, f -> f.getResource().getURI());
}
//NOTE: languages are resources in CRM but our sparql template make them strings by taking the label.
public Iterator<String> getLanguages(final Resource resource){
StmtIterator it = resource.listProperties(CRM.P72_has_language);
return Iterators.transform(it, f -> f.getString());
}
/**
* Finds the most specific type of res.
*
* @param res Resource you want to find the most specific type
* @param fallbackType Resource representing the type to return if there is no type or if we get AmbiguousSpecificTypeException
* @return Resource: the most specific type, if any. fallbackType otherwise
*/
public Resource findSpecificType(final Resource res, final Resource fallbackType) {
Resource type = fallbackType;
Set<Resource> types = AssemblerHelp.findSpecificTypes(res, fallbackType);
if (types == null || types.isEmpty()) {
log.warn("No specific type found. Returning the fallback type: " + fallbackType);
}
if (types.size() == 1) {
type = types.iterator().next();
}
if (types.size() > 1) {
log.warn("Found more than one possible specific type");
types.stream().forEach((t) -> log.warn(t));
if(types.contains(CRM.E29_Design_or_Procedure)){
log.warn("CRM.E29_Design_or_Procedure always wins");
type = CRM.E29_Design_or_Procedure;
}
else {
log.warn("Choosing the first");
type = types.iterator().next();
}
}
return type;
}
}