package eu.dnetlib.dhp.provision.update; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import com.jayway.jsonpath.JsonPath; import eu.dnetlib.dhp.provision.scholix.*; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.scholexplorer.relation.RelInfo; import eu.dnetlib.scholexplorer.relation.RelationMapper; public class Datacite2Scholix { private String rootPath = "$.attributes"; final RelationMapper relationMapper; public Datacite2Scholix(RelationMapper relationMapper) { this.relationMapper = relationMapper; } public List generateScholixFromJson(final String dJson) { List> relIds = getRelatedIendtifiers(dJson); relIds = relIds != null ? relIds .stream() .filter( m -> m.containsKey("relatedIdentifierType") && m.containsKey("relationType") && m.containsKey("relatedIdentifier")) .collect(Collectors.toList()) : null; if (relIds == null || relIds.size() == 0) return null; final String updated = JsonPath.read(dJson, rootPath + ".updated"); ScholixResource resource = generateDataciteScholixResource(dJson); return relIds .stream() .flatMap( s -> { try { final List result = generateScholix( resource, "" + s.get("relatedIdentifier"), s.get("relatedIdentifierType"), s.get("relationType"), updated); return result.stream(); } catch (Throwable e) { return new ArrayList().stream(); } }) .collect(Collectors.toList()); } public String getRootPath() { return rootPath; } public void setRootPath(String rootPath) { this.rootPath = rootPath; } private List generateScholix( ScholixResource source, final String pid, final String pidtype, final String relType, final String updated) { if ("doi".equalsIgnoreCase(pidtype)) { ScholixResource target = new ScholixResource(); target.setIdentifier(Collections.singletonList(new ScholixIdentifier(pid, pidtype))); final RelInfo relInfo = relationMapper.get(relType.toLowerCase()); final ScholixRelationship rel = new ScholixRelationship(relInfo.getOriginal(), "datacite", relInfo.getInverse()); final ScholixEntityId provider = source.getCollectedFrom().get(0).getProvider(); final Scholix s = new Scholix(); s.setSource(source); s.setTarget(target); s.setLinkprovider(Collections.singletonList(provider)); s.setPublisher(source.getPublisher()); s.setRelationship(rel); s.setPublicationDate(updated); return Collections.singletonList(s); } else { final List result = new ArrayList<>(); ScholixResource target = new ScholixResource(); target.setIdentifier(Collections.singletonList(new ScholixIdentifier(pid, pidtype))); target.setDnetIdentifier(generateId(pid, pidtype, "unknown")); target.setObjectType("unknown"); target.setCollectedFrom(generateDataciteCollectedFrom("incomplete")); final RelInfo relInfo = relationMapper.get(relType.toLowerCase()); final ScholixRelationship rel = new ScholixRelationship(relInfo.getOriginal(), "datacite", relInfo.getInverse()); final ScholixEntityId provider = source.getCollectedFrom().get(0).getProvider(); final Scholix s = new Scholix(); s.setSource(source); s.setTarget(target); s.setLinkprovider(Collections.singletonList(provider)); s.setPublisher(source.getPublisher()); s.setRelationship(rel); s.setPublicationDate(updated); s.generateIdentifier(); result.add(s); final Scholix s2 = new Scholix(); s2.setSource(target); s2.setTarget(source); s2.setLinkprovider(Collections.singletonList(provider)); s2.setPublisher(source.getPublisher()); s2 .setRelationship( new ScholixRelationship(relInfo.getInverse(), "datacite", relInfo.getOriginal())); s2.setPublicationDate(updated); s2.generateIdentifier(); result.add(s2); return result; } } public ScholixResource generateDataciteScholixResource(String dJson) { ScholixResource resource = new ScholixResource(); String DOI_PATH = rootPath + ".doi"; final String doi = JsonPath.read(dJson, DOI_PATH); resource.setIdentifier(Collections.singletonList(new ScholixIdentifier(doi, "doi"))); resource.setObjectType(getType(dJson)); resource.setDnetIdentifier(generateId(doi, "doi", resource.getObjectType())); resource.setCollectedFrom(generateDataciteCollectedFrom("complete")); final String publisher = JsonPath.read(dJson, rootPath + ".publisher"); if (StringUtils.isNotBlank(publisher)) resource.setPublisher(Collections.singletonList(new ScholixEntityId(publisher, null))); final String date = getDate(dJson); if (StringUtils.isNotBlank(date)) resource.setPublicationDate(date); final String title = getTitle(dJson); if (StringUtils.isNotBlank(title)) resource.setTitle(title); resource.setCreator(getCreators(dJson)); return resource; } private List getCreators(final String json) { final List creatorName = JsonPath.read(json, rootPath + ".creators[*].name"); if (creatorName != null && creatorName.size() > 0) { return creatorName .stream() .map(s -> new ScholixEntityId(s, null)) .collect(Collectors.toList()); } return null; } private String getTitle(final String json) { final List titles = JsonPath.read(json, rootPath + ".titles[*].title"); return titles != null && titles.size() > 0 ? titles.get(0) : null; } private String getDate(final String json) { final List> dates = JsonPath.read(json, rootPath + ".dates"); if (dates != null && dates.size() > 0) { List> issued = dates .stream() .filter(s -> "issued".equalsIgnoreCase(s.get("dateType"))) .collect(Collectors.toList()); if (issued.size() > 0) return issued.get(0).get("date"); } return null; } private List generateDataciteCollectedFrom(final String completionStatus) { final ScholixEntityId scholixEntityId = new ScholixEntityId( "Datasets in Datacite", Collections .singletonList( new ScholixIdentifier("dli_________::datacite", "dnet_identifier"))); return Collections .singletonList( new ScholixCollectedFrom(scholixEntityId, "collected", completionStatus)); } private String getType(final String json) { try { final String bibtext = JsonPath.read(json, rootPath + ".types.bibtex"); if ("article".equalsIgnoreCase(bibtext)) { return "publication"; } return "dataset"; } catch (Throwable e) { return "dataset"; } } private List> getRelatedIendtifiers(final String json) { String REL_IDENTIFIER_PATH = rootPath + ".relatedIdentifiers[*]"; List> res = JsonPath.read(json, REL_IDENTIFIER_PATH); return res; } public static String generateId(final String pid, final String pidType, final String entityType) { String type; switch (entityType) { case "publication": type = "50|"; break; case "dataset": type = "60|"; break; case "unknown": type = "70|"; break; default: throw new IllegalArgumentException("unexpected value " + entityType); } return type + DHPUtils .md5( String.format("%s::%s", pid.toLowerCase().trim(), pidType.toLowerCase().trim())); } }