[EOSC DUMP] extention to the schema to add the organization affiliated to the result
This commit is contained in:
parent
608122fe67
commit
ae10ae9793
|
@ -20,9 +20,19 @@ public class EoscResult extends CommunityResult {
|
||||||
@JsonSchema(description = "The subject dumped by type associated to the result")
|
@JsonSchema(description = "The subject dumped by type associated to the result")
|
||||||
private Map<String, List<Subject>> subject;
|
private Map<String, List<Subject>> subject;
|
||||||
|
|
||||||
// public EoscResult() {
|
@JsonSchema(description = "Te list of keywords associated to the result")
|
||||||
// super();
|
private List<String> keywords;
|
||||||
// }
|
|
||||||
|
@JsonSchema(description = "The list of organizations the result is affiliated to")
|
||||||
|
private List<Organization> affiliation;
|
||||||
|
|
||||||
|
public List<String> getKeywords() {
|
||||||
|
return keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKeywords(List<String> keywords) {
|
||||||
|
this.keywords = keywords;
|
||||||
|
}
|
||||||
|
|
||||||
public EoscInteroperabilityFramework getEoscIF() {
|
public EoscInteroperabilityFramework getEoscIF() {
|
||||||
return eoscIF;
|
return eoscIF;
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
package eu.dnetlib.dhp.eosc.model;
|
||||||
|
|
||||||
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 13/09/22
|
||||||
|
*/
|
||||||
|
public class Organization implements Serializable {
|
||||||
|
@JsonSchema(description = "the OpenAIRE id of the organizaiton")
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
@JsonSchema(description = "the name of the organization")
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
@JsonSchema(description = "the list of pids we have in OpenAIRE for the organization")
|
||||||
|
private List<OrganizationPid> pid ;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<OrganizationPid> getPid() {
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPid(List<OrganizationPid> pid) {
|
||||||
|
this.pid = pid;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package eu.dnetlib.dhp.eosc.model;
|
||||||
|
|
||||||
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 13/09/22
|
||||||
|
*/
|
||||||
|
public class OrganizationPid implements Serializable {
|
||||||
|
|
||||||
|
@JsonSchema(description = "the type of the organization pid")
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
@JsonSchema(description = "the value of the organization pid")
|
||||||
|
private String value;
|
||||||
|
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setValue(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
}
|
|
@ -109,17 +109,6 @@
|
||||||
"type" : "string",
|
"type" : "string",
|
||||||
"description" : "Only for results with type 'software': the URL to the repository with the source code"
|
"description" : "Only for results with type 'software': the URL to the repository with the source code"
|
||||||
},
|
},
|
||||||
"collectedfrom" : {
|
|
||||||
"description" : "Information about the sources from which the record has been collected",
|
|
||||||
"type" : "array",
|
|
||||||
"items" : {
|
|
||||||
"allOf" : [ {
|
|
||||||
"$ref" : "#/definitions/CfHbKeyValue"
|
|
||||||
}, {
|
|
||||||
"description" : "Information about the sources from which the record has been collected"
|
|
||||||
} ]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"contactgroup" : {
|
"contactgroup" : {
|
||||||
"description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
|
"description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
|
||||||
"type" : "array",
|
"type" : "array",
|
||||||
|
@ -318,6 +307,8 @@
|
||||||
"description" : "Geolocation information"
|
"description" : "Geolocation information"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"keywords": {
|
||||||
|
},
|
||||||
"id" : {
|
"id" : {
|
||||||
"type" : "string",
|
"type" : "string",
|
||||||
"description" : "The OpenAIRE identifiers for this result"
|
"description" : "The OpenAIRE identifiers for this result"
|
||||||
|
@ -380,13 +371,7 @@
|
||||||
},
|
},
|
||||||
"description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
|
"description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
|
||||||
},
|
},
|
||||||
"collectedfrom" : {
|
|
||||||
"allOf" : [ {
|
|
||||||
"$ref" : "#/definitions/CfHbKeyValue"
|
|
||||||
}, {
|
|
||||||
"description" : "Information about the source from which the record has been collected"
|
|
||||||
} ]
|
|
||||||
},
|
|
||||||
"hostedby" : {
|
"hostedby" : {
|
||||||
"allOf" : [ {
|
"allOf" : [ {
|
||||||
"$ref" : "#/definitions/CfHbKeyValue"
|
"$ref" : "#/definitions/CfHbKeyValue"
|
||||||
|
@ -570,19 +555,6 @@
|
||||||
"description" : "See definition of Dublin Core field dc:source"
|
"description" : "See definition of Dublin Core field dc:source"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"subjects" : {
|
|
||||||
"description" : "Keywords associated to the result",
|
|
||||||
"type" : "array",
|
|
||||||
"items" : {
|
|
||||||
"type" : "object",
|
|
||||||
"properties" : {
|
|
||||||
"provenance" : {
|
|
||||||
"allOf" : [ {
|
|
||||||
"$ref" : "#/definitions/Provenance"
|
|
||||||
}, {
|
|
||||||
"description" : "Why this subject is associated to the result"
|
|
||||||
} ]
|
|
||||||
},
|
|
||||||
"subject" : {
|
"subject" : {
|
||||||
"type" : "object",
|
"type" : "object",
|
||||||
"properties" : {
|
"properties" : {
|
||||||
|
|
|
@ -60,7 +60,7 @@ class GenerateJsonSchema {
|
||||||
SchemaGenerator generator = new SchemaGenerator(config);
|
SchemaGenerator generator = new SchemaGenerator(config);
|
||||||
JsonNode jsonSchema = generator.generateSchema(EoscResult.class);
|
JsonNode jsonSchema = generator.generateSchema(EoscResult.class);
|
||||||
|
|
||||||
System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema));
|
System.out.println(new ObjectMapper().writeValueAsString(jsonSchema));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -231,7 +232,44 @@ public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
out.setSubjects(subjectList);
|
out.setSubjects(subjectList);
|
||||||
} else {
|
} else {
|
||||||
((EoscResult) out).setSubject(createSubjectMap(input));
|
|
||||||
|
if (Optional.ofNullable(input.getSubject()).isPresent()) {
|
||||||
|
((EoscResult) out).setSubject(createSubjectMap(input));
|
||||||
|
((EoscResult) out)
|
||||||
|
.setKeywords(
|
||||||
|
input
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") &&
|
||||||
|
!s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
|
||||||
|
.map(s -> s.getValue())
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
long eoscSubjectNumber = input
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
|
||||||
|
.count();
|
||||||
|
if (eoscSubjectNumber > 1) {
|
||||||
|
throw new CardinalityTooHighException(
|
||||||
|
"EOSC IF in the result has cardinality greater than one. Change dump!");
|
||||||
|
}
|
||||||
|
if (eoscSubjectNumber == 1) {
|
||||||
|
StructuredProperty ifra = input
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
|
||||||
|
.findFirst()
|
||||||
|
.get();
|
||||||
|
((EoscResult) out)
|
||||||
|
.setEoscIF(
|
||||||
|
EoscInteroperabilityFramework
|
||||||
|
.newInstance(
|
||||||
|
ifra.getValue(), ifra.getValue(), "",
|
||||||
|
"compliesWith"));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out.setType(input.getResulttype().getClassid());
|
out.setType(input.getResulttype().getClassid());
|
||||||
|
@ -341,21 +379,24 @@ public class ResultMapper implements Serializable {
|
||||||
private static Map<String, List<eu.dnetlib.dhp.eosc.model.Subject>> createSubjectMap(
|
private static Map<String, List<eu.dnetlib.dhp.eosc.model.Subject>> createSubjectMap(
|
||||||
eu.dnetlib.dhp.schema.oaf.Result input) {
|
eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
Map<String, List<eu.dnetlib.dhp.eosc.model.Subject>> map = new HashMap<>();
|
Map<String, List<eu.dnetlib.dhp.eosc.model.Subject>> map = new HashMap<>();
|
||||||
if (!Optional.ofNullable(input.getSubject()).isPresent())
|
|
||||||
return map;
|
|
||||||
input.getSubject().stream().forEach(s -> {
|
|
||||||
String key = s.getQualifier().getClassid();
|
|
||||||
if (!map.containsKey(key) && !(key.equals("fos") || key.equals("sdg"))) {
|
|
||||||
|
|
||||||
map.put(key, new ArrayList<>());
|
input.getSubject().stream().forEach(s -> {
|
||||||
|
String key = s.getQualifier().getClassid().toLowerCase();
|
||||||
|
if (!key.equalsIgnoreCase("http://www.abs.gov.au/ausstats/abs@.nsf/0/6BB427AB9696C225CA2574180004463E") &&
|
||||||
|
!key.equalsIgnoreCase("keyword") &&
|
||||||
|
!key.equalsIgnoreCase("eosc")) {
|
||||||
|
if (!map.containsKey(key)) {
|
||||||
|
|
||||||
|
map.put(key, new ArrayList<>());
|
||||||
|
}
|
||||||
|
eu.dnetlib.dhp.eosc.model.Subject subject = new eu.dnetlib.dhp.eosc.model.Subject();
|
||||||
|
subject.setValue(s.getValue());
|
||||||
|
Provenance p = getProvenance(s);
|
||||||
|
if (p != null) {
|
||||||
|
subject.setProvenance(p);
|
||||||
|
}
|
||||||
|
map.get(key).add(subject);
|
||||||
}
|
}
|
||||||
eu.dnetlib.dhp.eosc.model.Subject subject = new eu.dnetlib.dhp.eosc.model.Subject();
|
|
||||||
subject.setValue(s.getValue());
|
|
||||||
Provenance p = getProvenance(s);
|
|
||||||
if (p != null) {
|
|
||||||
subject.setProvenance(p);
|
|
||||||
}
|
|
||||||
map.get(key).add(subject);
|
|
||||||
});
|
});
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,7 +80,14 @@ public class SelectEoscResultsJobStep1 implements Serializable {
|
||||||
.readPath(spark, inputPath, inputClazz)
|
.readPath(spark, inputPath, inputClazz)
|
||||||
.filter(
|
.filter(
|
||||||
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
|
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
|
||||||
&& r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
&& (r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")) ||
|
||||||
|
Optional
|
||||||
|
.ofNullable(r.getSubject())
|
||||||
|
.map(
|
||||||
|
s -> s
|
||||||
|
.stream()
|
||||||
|
.anyMatch(sbj -> sbj.getValue().equalsIgnoreCase("EOSC::RO-crate")))
|
||||||
|
.orElse(false)))
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<R, EoscResult>) r -> (EoscResult) ResultMapper
|
(MapFunction<R, EoscResult>) r -> (EoscResult) ResultMapper
|
||||||
.map(r, communityMap, Constants.DUMPTYPE.EOSC.getType()),
|
.map(r, communityMap, Constants.DUMPTYPE.EOSC.getType()),
|
||||||
|
|
|
@ -314,7 +314,7 @@
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
|
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/tar/orp</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/tar/otherresearchproduct</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
<arg>--dumpType</arg><arg>eosc</arg>
|
<arg>--dumpType</arg><arg>eosc</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
|
Loading…
Reference in New Issue