[EOSC DUMP] extention to the schema to add the organization affiliated to the result

This commit is contained in:
Miriam Baglioni 2022-09-13 12:10:47 +02:00
parent 608122fe67
commit ae10ae9793
8 changed files with 161 additions and 51 deletions

View File

@ -20,9 +20,19 @@ public class EoscResult extends CommunityResult {
@JsonSchema(description = "The subject dumped by type associated to the result")
private Map<String, List<Subject>> subject;
// public EoscResult() {
// super();
// }
@JsonSchema(description = "Te list of keywords associated to the result")
private List<String> keywords;
@JsonSchema(description = "The list of organizations the result is affiliated to")
private List<Organization> affiliation;
public List<String> getKeywords() {
return keywords;
}
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
public EoscInteroperabilityFramework getEoscIF() {
return eoscIF;

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.eosc.model;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import java.io.Serializable;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 13/09/22
*/
public class Organization implements Serializable {
@JsonSchema(description = "the OpenAIRE id of the organizaiton")
private String id;
@JsonSchema(description = "the name of the organization")
private String name;
@JsonSchema(description = "the list of pids we have in OpenAIRE for the organization")
private List<OrganizationPid> pid ;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<OrganizationPid> getPid() {
return pid;
}
public void setPid(List<OrganizationPid> pid) {
this.pid = pid;
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.eosc.model;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 13/09/22
*/
public class OrganizationPid implements Serializable {
@JsonSchema(description = "the type of the organization pid")
private String type;
@JsonSchema(description = "the value of the organization pid")
private String value;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
}

View File

@ -109,17 +109,6 @@
"type" : "string",
"description" : "Only for results with type 'software': the URL to the repository with the source code"
},
"collectedfrom" : {
"description" : "Information about the sources from which the record has been collected",
"type" : "array",
"items" : {
"allOf" : [ {
"$ref" : "#/definitions/CfHbKeyValue"
}, {
"description" : "Information about the sources from which the record has been collected"
} ]
}
},
"contactgroup" : {
"description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
"type" : "array",
@ -318,6 +307,8 @@
"description" : "Geolocation information"
}
},
"keywords": {
},
"id" : {
"type" : "string",
"description" : "The OpenAIRE identifiers for this result"
@ -380,13 +371,7 @@
},
"description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
},
"collectedfrom" : {
"allOf" : [ {
"$ref" : "#/definitions/CfHbKeyValue"
}, {
"description" : "Information about the source from which the record has been collected"
} ]
},
"hostedby" : {
"allOf" : [ {
"$ref" : "#/definitions/CfHbKeyValue"
@ -570,19 +555,6 @@
"description" : "See definition of Dublin Core field dc:source"
}
},
"subjects" : {
"description" : "Keywords associated to the result",
"type" : "array",
"items" : {
"type" : "object",
"properties" : {
"provenance" : {
"allOf" : [ {
"$ref" : "#/definitions/Provenance"
}, {
"description" : "Why this subject is associated to the result"
} ]
},
"subject" : {
"type" : "object",
"properties" : {

View File

@ -60,7 +60,7 @@ class GenerateJsonSchema {
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(EoscResult.class);
System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema));
System.out.println(new ObjectMapper().writeValueAsString(jsonSchema));
}
@Test

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
@ -231,7 +232,44 @@ public class ResultMapper implements Serializable {
out.setSubjects(subjectList);
} else {
((EoscResult) out).setSubject(createSubjectMap(input));
if (Optional.ofNullable(input.getSubject()).isPresent()) {
((EoscResult) out).setSubject(createSubjectMap(input));
((EoscResult) out)
.setKeywords(
input
.getSubject()
.stream()
.filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") &&
!s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
.map(s -> s.getValue())
.collect(Collectors.toList()));
long eoscSubjectNumber = input
.getSubject()
.stream()
.filter(s -> s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
.count();
if (eoscSubjectNumber > 1) {
throw new CardinalityTooHighException(
"EOSC IF in the result has cardinality greater than one. Change dump!");
}
if (eoscSubjectNumber == 1) {
StructuredProperty ifra = input
.getSubject()
.stream()
.filter(s -> s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
.findFirst()
.get();
((EoscResult) out)
.setEoscIF(
EoscInteroperabilityFramework
.newInstance(
ifra.getValue(), ifra.getValue(), "",
"compliesWith"));
}
}
}
out.setType(input.getResulttype().getClassid());
@ -341,21 +379,24 @@ public class ResultMapper implements Serializable {
private static Map<String, List<eu.dnetlib.dhp.eosc.model.Subject>> createSubjectMap(
eu.dnetlib.dhp.schema.oaf.Result input) {
Map<String, List<eu.dnetlib.dhp.eosc.model.Subject>> map = new HashMap<>();
if (!Optional.ofNullable(input.getSubject()).isPresent())
return map;
input.getSubject().stream().forEach(s -> {
String key = s.getQualifier().getClassid();
if (!map.containsKey(key) && !(key.equals("fos") || key.equals("sdg"))) {
map.put(key, new ArrayList<>());
input.getSubject().stream().forEach(s -> {
String key = s.getQualifier().getClassid().toLowerCase();
if (!key.equalsIgnoreCase("http://www.abs.gov.au/ausstats/abs@.nsf/0/6BB427AB9696C225CA2574180004463E") &&
!key.equalsIgnoreCase("keyword") &&
!key.equalsIgnoreCase("eosc")) {
if (!map.containsKey(key)) {
map.put(key, new ArrayList<>());
}
eu.dnetlib.dhp.eosc.model.Subject subject = new eu.dnetlib.dhp.eosc.model.Subject();
subject.setValue(s.getValue());
Provenance p = getProvenance(s);
if (p != null) {
subject.setProvenance(p);
}
map.get(key).add(subject);
}
eu.dnetlib.dhp.eosc.model.Subject subject = new eu.dnetlib.dhp.eosc.model.Subject();
subject.setValue(s.getValue());
Provenance p = getProvenance(s);
if (p != null) {
subject.setProvenance(p);
}
map.get(key).add(subject);
});
return map;
}

View File

@ -80,7 +80,14 @@ public class SelectEoscResultsJobStep1 implements Serializable {
.readPath(spark, inputPath, inputClazz)
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
&& r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
&& (r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")) ||
Optional
.ofNullable(r.getSubject())
.map(
s -> s
.stream()
.anyMatch(sbj -> sbj.getValue().equalsIgnoreCase("EOSC::RO-crate")))
.orElse(false)))
.map(
(MapFunction<R, EoscResult>) r -> (EoscResult) ResultMapper
.map(r, communityMap, Constants.DUMPTYPE.EOSC.getType()),

View File

@ -314,7 +314,7 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/orp</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/otherresearchproduct</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--dumpType</arg><arg>eosc</arg>
</spark>