dnet-hadoop/dhp-workflows/dhp-graph-provision-scholex.../src/main/java/eu/dnetlib/dhp/provision/update/Datacite2Scholix.java

230 lines
7.3 KiB
Java

package eu.dnetlib.dhp.provision.update;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.provision.scholix.*;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.scholexplorer.relation.RelInfo;
import eu.dnetlib.scholexplorer.relation.RelationMapper;
public class Datacite2Scholix {
private String rootPath = "$.attributes";
final RelationMapper relationMapper;
public Datacite2Scholix(RelationMapper relationMapper) {
this.relationMapper = relationMapper;
}
public List<Scholix> generateScholixFromJson(final String dJson) {
List<Map<String, String>> relIds = getRelatedIendtifiers(dJson);
relIds = relIds != null
? relIds
.stream()
.filter(
m -> m.containsKey("relatedIdentifierType")
&& m.containsKey("relationType")
&& m.containsKey("relatedIdentifier"))
.collect(Collectors.toList())
: null;
if (relIds == null || relIds.size() == 0)
return null;
final String updated = JsonPath.read(dJson, rootPath + ".updated");
ScholixResource resource = generateDataciteScholixResource(dJson);
return relIds
.stream()
.flatMap(
s -> {
try {
final List<Scholix> result = generateScholix(
resource,
"" + s.get("relatedIdentifier"),
s.get("relatedIdentifierType"),
s.get("relationType"),
updated);
return result.stream();
} catch (Throwable e) {
return new ArrayList<Scholix>().stream();
}
})
.collect(Collectors.toList());
}
public String getRootPath() {
return rootPath;
}
public void setRootPath(String rootPath) {
this.rootPath = rootPath;
}
private List<Scholix> generateScholix(
ScholixResource source,
final String pid,
final String pidtype,
final String relType,
final String updated) {
if ("doi".equalsIgnoreCase(pidtype)) {
ScholixResource target = new ScholixResource();
target.setIdentifier(Collections.singletonList(new ScholixIdentifier(pid, pidtype)));
final RelInfo relInfo = relationMapper.get(relType.toLowerCase());
final ScholixRelationship rel = new ScholixRelationship(relInfo.getOriginal(), "datacite",
relInfo.getInverse());
final ScholixEntityId provider = source.getCollectedFrom().get(0).getProvider();
final Scholix s = new Scholix();
s.setSource(source);
s.setTarget(target);
s.setLinkprovider(Collections.singletonList(provider));
s.setPublisher(source.getPublisher());
s.setRelationship(rel);
s.setPublicationDate(updated);
return Collections.singletonList(s);
} else {
final List<Scholix> result = new ArrayList<>();
ScholixResource target = new ScholixResource();
target.setIdentifier(Collections.singletonList(new ScholixIdentifier(pid, pidtype)));
target.setDnetIdentifier(generateId(pid, pidtype, "unknown"));
target.setObjectType("unknown");
target.setCollectedFrom(generateDataciteCollectedFrom("incomplete"));
final RelInfo relInfo = relationMapper.get(relType.toLowerCase());
final ScholixRelationship rel = new ScholixRelationship(relInfo.getOriginal(), "datacite",
relInfo.getInverse());
final ScholixEntityId provider = source.getCollectedFrom().get(0).getProvider();
final Scholix s = new Scholix();
s.setSource(source);
s.setTarget(target);
s.setLinkprovider(Collections.singletonList(provider));
s.setPublisher(source.getPublisher());
s.setRelationship(rel);
s.setPublicationDate(updated);
s.generateIdentifier();
result.add(s);
final Scholix s2 = new Scholix();
s2.setSource(target);
s2.setTarget(source);
s2.setLinkprovider(Collections.singletonList(provider));
s2.setPublisher(source.getPublisher());
s2
.setRelationship(
new ScholixRelationship(relInfo.getInverse(), "datacite", relInfo.getOriginal()));
s2.setPublicationDate(updated);
s2.generateIdentifier();
result.add(s2);
return result;
}
}
public ScholixResource generateDataciteScholixResource(String dJson) {
ScholixResource resource = new ScholixResource();
String DOI_PATH = rootPath + ".doi";
final String doi = JsonPath.read(dJson, DOI_PATH);
resource.setIdentifier(Collections.singletonList(new ScholixIdentifier(doi, "doi")));
resource.setObjectType(getType(dJson));
resource.setDnetIdentifier(generateId(doi, "doi", resource.getObjectType()));
resource.setCollectedFrom(generateDataciteCollectedFrom("complete"));
final String publisher = JsonPath.read(dJson, rootPath + ".publisher");
if (StringUtils.isNotBlank(publisher))
resource.setPublisher(Collections.singletonList(new ScholixEntityId(publisher, null)));
final String date = getDate(dJson);
if (StringUtils.isNotBlank(date))
resource.setPublicationDate(date);
final String title = getTitle(dJson);
if (StringUtils.isNotBlank(title))
resource.setTitle(title);
resource.setCreator(getCreators(dJson));
return resource;
}
private List<ScholixEntityId> getCreators(final String json) {
final List<String> creatorName = JsonPath.read(json, rootPath + ".creators[*].name");
if (creatorName != null && creatorName.size() > 0) {
return creatorName
.stream()
.map(s -> new ScholixEntityId(s, null))
.collect(Collectors.toList());
}
return null;
}
private String getTitle(final String json) {
final List<String> titles = JsonPath.read(json, rootPath + ".titles[*].title");
return titles != null && titles.size() > 0 ? titles.get(0) : null;
}
private String getDate(final String json) {
final List<Map<String, String>> dates = JsonPath.read(json, rootPath + ".dates");
if (dates != null && dates.size() > 0) {
List<Map<String, String>> issued = dates
.stream()
.filter(s -> "issued".equalsIgnoreCase(s.get("dateType")))
.collect(Collectors.toList());
if (issued.size() > 0)
return issued.get(0).get("date");
}
return null;
}
private List<ScholixCollectedFrom> generateDataciteCollectedFrom(final String completionStatus) {
final ScholixEntityId scholixEntityId = new ScholixEntityId(
"Datasets in Datacite",
Collections
.singletonList(
new ScholixIdentifier("dli_________::datacite", "dnet_identifier")));
return Collections
.singletonList(
new ScholixCollectedFrom(scholixEntityId, "collected", completionStatus));
}
private String getType(final String json) {
try {
final String bibtext = JsonPath.read(json, rootPath + ".types.bibtex");
if ("article".equalsIgnoreCase(bibtext)) {
return "publication";
}
return "dataset";
} catch (Throwable e) {
return "dataset";
}
}
private List<Map<String, String>> getRelatedIendtifiers(final String json) {
String REL_IDENTIFIER_PATH = rootPath + ".relatedIdentifiers[*]";
List<Map<String, String>> res = JsonPath.read(json, REL_IDENTIFIER_PATH);
return res;
}
public static String generateId(final String pid, final String pidType, final String entityType) {
String type;
switch (entityType) {
case "publication":
type = "50|";
break;
case "dataset":
type = "60|";
break;
case "unknown":
type = "70|";
break;
default:
throw new IllegalArgumentException("unexpected value " + entityType);
}
return type
+ DHPUtils
.md5(
String.format("%s::%s", pid.toLowerCase().trim(), pidType.toLowerCase().trim()));
}
}