#7911 - first find dataset schematics related to zenodo and extract values from those fields so as to create zenodo relators

This commit is contained in:
Aldo Mihasi 2023-05-17 13:31:37 +03:00
parent 62409ca345
commit 67cc2df382
9 changed files with 279 additions and 114 deletions

View File

@ -4,6 +4,9 @@ import java.util.List;
public interface ConfigLoader {
List<DOIFunder> getDOIFunders();
List<String> getRelatedIdentifiers();
List<String> getAcceptedPidTypes();
PidFieldNames getPidFieldNames();
byte[] getLogo();
ZenodoConfig getZenodoConfig();
}

View File

@ -19,11 +19,18 @@ public class ConfigLoaderImpl implements ConfigLoader{
private static final ObjectMapper mapper = new ObjectMapper();
private List<DOIFunder> doiFunders = new ArrayList<>();
private List<String> relatedIdentifiers = new ArrayList<>();
private List<String> acceptedPidTypes = new ArrayList<>();
private PidFieldNames pidFieldNames = new PidFieldNames();
private ZenodoConfig zenodoConfig;
@Autowired
private Environment environment;
@Autowired
public ConfigLoaderImpl(Environment environment){
this.environment = environment;
}
@Override
public List<DOIFunder> getDOIFunders() {
if (doiFunders == null || doiFunders.isEmpty()) {
@ -37,6 +44,35 @@ public class ConfigLoaderImpl implements ConfigLoader{
return doiFunders;
}
@Override
public List<String> getRelatedIdentifiers() {
if (relatedIdentifiers == null || relatedIdentifiers.isEmpty()) {
BufferedReader ids = new BufferedReader(new InputStreamReader(getStreamFromPath("relatedIdentifiers.txt")));
relatedIdentifiers = ids.lines().collect(Collectors.toList());
}
return relatedIdentifiers;
}
@Override
public List<String> getAcceptedPidTypes() {
if (acceptedPidTypes == null || acceptedPidTypes.isEmpty()) {
BufferedReader ids = new BufferedReader(new InputStreamReader(getStreamFromPath("acceptedPidTypes.txt")));
acceptedPidTypes = ids.lines().collect(Collectors.toList());
}
return acceptedPidTypes;
}
@Override
public PidFieldNames getPidFieldNames() {
try {
pidFieldNames = mapper.readValue(getStreamFromPath("datasetFieldsPid.json"), PidFieldNames.class);
}
catch (IOException e){
logger.error(e.getLocalizedMessage(), e);
}
return pidFieldNames;
}
@Override
public ZenodoConfig getZenodoConfig() {
if (zenodoConfig == null) {

View File

@ -0,0 +1,31 @@
package eu.eudat.depositinterface.zenodorepository.config;
import com.fasterxml.jackson.annotation.JsonProperty;
public class PidFieldNames {
@JsonProperty("pidName")
private String pidName;
@JsonProperty("pidTypeName")
private String pidTypeName;
public PidFieldNames() {}
public PidFieldNames(String pidName, String pidTypeName) {
this.pidName = pidName;
this.pidTypeName = pidTypeName;
}
public String getPidName() {
return pidName;
}
public void setPidName(String pidName) {
this.pidName = pidName;
}
public String getPidTypeName() {
return pidTypeName;
}
public void setPidTypeName(String pidTypeName) {
this.pidTypeName = pidTypeName;
}
}

View File

@ -55,7 +55,7 @@ public class ZenodoDeposit implements RepositoryDeposit {
HttpHeaders headers = new HttpHeaders();
headers.setAccept(Collections.singletonList(MediaType.APPLICATION_JSON));
headers.setContentType(MediaType.APPLICATION_JSON);
eu.eudat.depositinterface.zenodorepository.models.ZenodoDeposit deposit = DMPToZenodoMapper.fromDMP(dmpDepositModel, "argos", "ARGOS", "https://argos.openaire.eu/", this.configLoader.getDOIFunders());
eu.eudat.depositinterface.zenodorepository.models.ZenodoDeposit deposit = DMPToZenodoMapper.fromDMP(dmpDepositModel);
HttpEntity<eu.eudat.depositinterface.zenodorepository.models.ZenodoDeposit> request = new HttpEntity<>(deposit, headers);
Map createResponse;

View File

@ -3,21 +3,126 @@ package eu.eudat.depositinterface.zenodorepository.mapper;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.eudat.depositinterface.enums.FieldType;
import eu.eudat.depositinterface.models.*;
import eu.eudat.depositinterface.zenodorepository.config.ConfigLoader;
import eu.eudat.depositinterface.zenodorepository.config.DOIFunder;
import eu.eudat.depositinterface.zenodorepository.config.PidFieldNames;
import eu.eudat.depositinterface.zenodorepository.models.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.env.Environment;
import org.springframework.stereotype.Component;
import java.time.Instant;
import java.util.*;
import java.util.stream.Collectors;
@Component
public class DMPToZenodoMapper {
private static final Logger logger = LoggerFactory.getLogger(DMPToZenodoMapper.class);
private static final ObjectMapper objectMapper = new ObjectMapper();
public static ZenodoDeposit fromDMP(DMPDepositModel dmp, String zenodoCommunity, String zenodoAffiliation, String domain, List<DOIFunder> doiFunders) throws JsonProcessingException {
private static ConfigLoader configLoader;
private static Environment environment;
private static PidFieldNames pidFieldNames;
@Autowired
public DMPToZenodoMapper(ConfigLoader configL, Environment env){
configLoader = configL;
pidFieldNames = configLoader.getPidFieldNames();
environment = env;
}
private static List<DatasetFieldsDepositModel> findSchemanticValues(String relatedId, List<DatasetFieldsDepositModel> fields){
return fields.stream().filter(f -> f.getSchematics().contains(relatedId)).collect(Collectors.toList());
}
private static Set<String> extractSchemanticValues(List<DatasetFieldsDepositModel> fields, List<String> acceptedPidTypes) throws JsonProcessingException{
Set<String> values = new HashSet<>();
for(DatasetFieldsDepositModel field: fields){
String value = (String) field.getValue();
if(value != null && !value.isEmpty()) {
switch (FieldType.fromName(field.getRenderStyleType())) {
case FREE_TEXT:
case TEXT_AREA:
case RICH_TEXT_AREA:
case RADIO_BOX:
case DATE_PICKER:
values.add(value);
break;
case COMBO_BOX:
if (field.isMultiple()) {
List<String> selected = objectMapper.readValue(value, new TypeReference<List<String>>() {});
values.addAll(selected);
}
else {
values.add(value);
}
break;
case REGISTRIES:
case SERVICES:
case EXTERNAL_DATASETS:
case DATA_REPOSITORIES:
case PUB_REPOSITORIES:
case JOURNAL_REPOSITORIES:
case TAXONOMIES:
case PUBLICATIONS:
if (field.isMultiple()) {
List<String> selected = objectMapper.readValue(value, new TypeReference<List<String>>() {});
for (String s : selected) {
Map<String, String> valueMap = objectMapper.readValue(s, new TypeReference<Map<String, String>>() {});
String pid = valueMap.get(pidFieldNames.getPidName());
String pidType = valueMap.get(pidFieldNames.getPidTypeName());
if (acceptedPidTypes.contains(pidType)) {
values.add(pid);
}
}
}
else {
Map<String, String> valueMap = objectMapper.readValue(value, new TypeReference<Map<String, String>>() {});
String pid = valueMap.get(pidFieldNames.getPidName());
String pidType = valueMap.get(pidFieldNames.getPidTypeName());
if (acceptedPidTypes.contains(pidType)) {
values.add(pid);
}
}
break;
case ORGANIZATIONS:
case RESEARCHERS:
if (field.isMultiple()) {
List<String> selected = objectMapper.readValue(value, new TypeReference<List<String>>() {});
for (String s : selected) {
Map<String, String> valueMap = objectMapper.readValue(s, new TypeReference<Map<String, String>>() {});
String pid = valueMap.get("reference");
if(pid != null) {
values.add(pid);
}
}
}
else {
Map<String, String> valueMap = objectMapper.readValue(value, new TypeReference<Map<String, String>>() {});
String pid = valueMap.get("reference");
if(pid != null) {
values.add(pid);
}
}
break;
case DATASET_IDENTIFIER:
Map<String, String> valueMap = objectMapper.readValue(value, new TypeReference<Map<String, String>>() {});
values.add(valueMap.get("identifier"));
break;
}
}
}
return values;
}
public static ZenodoDeposit fromDMP(DMPDepositModel dmp) throws JsonProcessingException {
Map<String, Object> extraProperties = dmp.getExtraProperties() != null ? new org.json.JSONObject(dmp.getExtraProperties()).toMap() : new HashMap<>();
ZenodoDeposit deposit = new ZenodoDeposit();
@ -31,115 +136,21 @@ public class DMPToZenodoMapper {
List<String> keywords = new ArrayList<>();
List<String> references = new ArrayList<>();
//objectMapper.enable(JsonParser.Feature.ALLOW_SINGLE_QUOTES);
List<String> acceptedPidTypes = configLoader.getAcceptedPidTypes();
for(DatasetDepositModel dataset: dmp.getDatasets()){
for(DatasetFieldsDepositModel field: dataset.getFields()){
for(String schematic: field.getSchematics()){
if(schematic.contains("zenodo.")){
if(field.getValue() != null && !field.getValue().isEmpty()){
if(schematic.contains("related_identifiers")){
if(schematic.endsWith("relation")){
Optional<ZenodoRelator> relatorOptional = relatedIdentifiers.stream().filter(r -> r.getRelation() == null).findAny();
if(relatorOptional.isPresent()){
relatorOptional.get().setRelation(field.getValue());
}
else{
ZenodoRelator relator = new ZenodoRelator();
relator.setRelation(field.getValue());
relatedIdentifiers.add(relator);
}
}
else if(schematic.endsWith("identifier")){
Optional<ZenodoRelator> relatorOptional = relatedIdentifiers.stream().filter(r -> r.getIdentifier() == null).findAny();
if(relatorOptional.isPresent()){
relatorOptional.get().setIdentifier(field.getValue());
}
else{
ZenodoRelator relator = new ZenodoRelator();
relator.setIdentifier(field.getValue());
relatedIdentifiers.add(relator);
}
}
else{
JsonNode relatorNodes = objectMapper.readTree(field.getValue());
if(relatorNodes.isArray()){
List<ZenodoRelator> relators = objectMapper.readValue(field.getValue(), new TypeReference<List<ZenodoRelator>>(){});
relatedIdentifiers.addAll(relators);
}
else{
ZenodoRelator relator = objectMapper.readValue(field.getValue(), ZenodoRelator.class);
relatedIdentifiers.add(relator);
}
}
}
else if(schematic.contains("communities")){
JsonNode communitiesNodes = objectMapper.readTree(field.getValue());
if(communitiesNodes.isArray()){
List<ZenodoComunity> communitiesList = objectMapper.readValue(field.getValue(), new TypeReference<List<ZenodoComunity>>(){});
communities.addAll(communitiesList);
}
else{
ZenodoComunity community = objectMapper.readValue(field.getValue(), ZenodoComunity.class);
communities.add(community);
}
}
else if(schematic.contains("contributors")){
JsonNode contributorsNodes = objectMapper.readTree(field.getValue());
if(contributorsNodes.isArray()){
List<ZenodoContributor> contributorsList = objectMapper.readValue(field.getValue(), new TypeReference<List<ZenodoContributor>>(){});
contributors.addAll(contributorsList);
}
else{
ZenodoContributor contributor = objectMapper.readValue(field.getValue(), ZenodoContributor.class);
contributors.add(contributor);
}
}
else if(schematic.contains("creators")){
JsonNode creatorsNodes = objectMapper.readTree(field.getValue());
if(creatorsNodes.isArray()){
List<ZenodoCreator> creatorsList = objectMapper.readValue(field.getValue(), new TypeReference<List<ZenodoCreator>>(){});
creators.addAll(creatorsList);
}
else{
ZenodoCreator creator = objectMapper.readValue(field.getValue(), ZenodoCreator.class);
creators.add(creator);
}
}
else if(schematic.contains("grants")){
JsonNode grantsNodes = objectMapper.readTree(field.getValue());
if(grantsNodes.isArray()){
List<ZenodoGrant> grantsList = objectMapper.readValue(field.getValue(), new TypeReference<List<ZenodoGrant>>(){});
grants.addAll(grantsList);
}
else{
ZenodoGrant grant = objectMapper.readValue(field.getValue(), ZenodoGrant.class);
grants.add(grant);
}
}
else if(schematic.contains("keywords")){
if(field.getValue().startsWith("[")){
List<String> keywordsList = objectMapper.readValue(field.getValue(), new TypeReference<List<String>>(){});
keywords.addAll(keywordsList);
}
else{
keywords.add(field.getValue());
}
}
else if(schematic.contains("references")){
if(field.getValue().startsWith("[")){
List<String> referencesList = objectMapper.readValue(field.getValue(), new TypeReference<List<String>>(){});
references.addAll(referencesList);
}
else{
references.add(field.getValue());
}
}
else{
schematicsMap.put(schematic.replace("zenodo.", ""), field.getValue());
}
}
}
for(String relatedId: configLoader.getRelatedIdentifiers()){
List<DatasetFieldsDepositModel> fields = findSchemanticValues(relatedId, dataset.getFields());
Set<String> values = extractSchemanticValues(fields, acceptedPidTypes);
for(String value: values){
ZenodoRelator relator = new ZenodoRelator();
relator.setRelation(relatedId.substring(relatedId.lastIndexOf(".") + 1));
relator.setIdentifier(value);
relatedIdentifiers.add(relator);
}
}
}
schematicsMap.put("related_identifiers", relatedIdentifiers);
@ -160,7 +171,8 @@ public class DMPToZenodoMapper {
deposit.getMetadata().setPublicationType("datamanagementplan");
deposit.getMetadata().setDescription((dmp.getDescription() != null && !dmp.getDescription().isEmpty() ? dmp.getDescription() : "<p></p>"));
deposit.getMetadata().setVersion(String.valueOf(dmp.getVersion()));
if(zenodoCommunity != null && !zenodoAffiliation.isEmpty()) {
String zenodoCommunity = environment.getProperty("zenodo.community");
if(zenodoCommunity != null && !zenodoCommunity.isEmpty()) {
ZenodoComunity community = new ZenodoComunity();
community.setIdentifier(zenodoCommunity);
deposit.getMetadata().getCommunities().add(community);
@ -188,10 +200,11 @@ public class DMPToZenodoMapper {
}
if (dmp.isPublic()) {
ZenodoRelator relator = new ZenodoRelator();
relator.setIdentifier(domain + "/external/zenodo/" + dmp.getId().toString());
relator.setIdentifier(environment.getProperty("zenodo.domain") + "/external/zenodo/" + dmp.getId().toString());
relator.setRelation("isIdenticalTo");
deposit.getMetadata().getRelatedIdentifiers().add(relator);
}
String zenodoAffiliation = environment.getProperty("zenodo.affiliation");
List<ZenodoContributor> contributors1 = dmp.getUsers().stream().map(userDMP -> {
ZenodoContributor contributor = new ZenodoContributor();
contributor.setName(userDMP.getUser().getName());
@ -229,7 +242,7 @@ public class DMPToZenodoMapper {
String grantReferenceHead = dmp.getGrant().getReference().split(":")[0];
if (grantReferenceHead.equals("openaire")) {
String grantReferenceTail = dmp.getGrant().getReference().split(":")[3];
DOIFunder doiFunder = doiFunders.stream()
DOIFunder doiFunder = configLoader.getDOIFunders().stream()
.filter(doiFunder1 -> dmp.getGrant().getFunder().getLabel().contains(doiFunder1.getFunder()) || doiFunder1.getFunder().contains(dmp.getGrant().getFunder().getLabel()))
.findFirst().orElse(null);
if (doiFunder != null) {

View File

@ -0,0 +1,43 @@
actrn
ark
bibcode
nct
drks
doi
euctr
data.europa.eu
epo_id
GRID
gsk
GeoPass
GBIF
hal
handle
isrctn
ichushi
ISNI
jprn
mag_id
NAID
NCID
oai
orcid_pending
orcid
OrgPeg
PANGAEA
PIC
epo_nr_epodoc
pdb
pmc
pmid
RNSR
ROR
RRID
UNKNOWN
VIAF
who
arXiv
info:eu-repo/dai
orcidworkid
urn
w3id

View File

@ -2,3 +2,6 @@ configuration.doi_funder=DOI_Funder.json
configuration.zenodo.logo=${CONFIGURATION_LOGO_ZENODO}
storage.temp=${STORAGE_TMP_ZENODO}
configuration.zenodo=${CONFIGURATION_ZENODO}
zenodo.community=argos
zenodo.affiliation=ARGOS
zenodo.domain=https://argos.openaire.eu/

View File

@ -0,0 +1,4 @@
{
"pidName": "pid",
"pidTypeName": "pidTypeField"
}

View File

@ -0,0 +1,32 @@
zenodo.related_identifiers.isCitedBy
zenodo.related_identifiers.cites
zenodo.related_identifiers.isSupplementTo
zenodo.related_identifiers.isSupplementedBy
zenodo.related_identifiers.isContinuedBy
zenodo.related_identifiers.continues
zenodo.related_identifiers.isDescribedBy
zenodo.related_identifiers.describes
zenodo.related_identifiers.hasMetadata
zenodo.related_identifiers.isMetadataFor
zenodo.related_identifiers.isNewVersionOf
zenodo.related_identifiers.isPreviousVersionOf
zenodo.related_identifiers.isPartOf
zenodo.related_identifiers.hasPart
zenodo.related_identifiers.isReferencedBy
zenodo.related_identifiers.references
zenodo.related_identifiers.isDocumentedBy
zenodo.related_identifiers.documents
zenodo.related_identifiers.isCompiledBy
zenodo.related_identifiers.compiles
zenodo.related_identifiers.isVariantFormOf
zenodo.related_identifiers.isOriginalFormof
zenodo.related_identifiers.isIdenticalTo
zenodo.related_identifiers.isAlternateIdentifier
zenodo.related_identifiers.isReviewedBy
zenodo.related_identifiers.reviews
zenodo.related_identifiers.isDerivedFrom
zenodo.related_identifiers.isSourceOf
zenodo.related_identifiers.requires
zenodo.related_identifiers.isRequiredBy
zenodo.related_identifiers.isObsoletedBy
zenodo.related_identifiers.obsoletes