orcid-no-doi #43
|
@ -1,420 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.doiboost.orcidnodoi.oaf;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.JsonArray;
|
|
||||||
import com.google.gson.JsonElement;
|
|
||||||
import com.google.gson.JsonObject;
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
import eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks;
|
|
||||||
import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility;
|
|
||||||
import eu.dnetlib.doiboost.orcidnodoi.util.Pair;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*;
|
|
||||||
|
|
||||||
public class OrcidWorkToOAF {
|
|
||||||
|
|
||||||
static Logger logger = LoggerFactory.getLogger(OrcidWorkToOAF.class);
|
|
||||||
|
|
||||||
public static final String ORCID = "ORCID";
|
|
||||||
public final static String orcidPREFIX = "orcid_______";
|
|
||||||
public static final String OPENAIRE_PREFIX = "openaire____";
|
|
||||||
public static final String SEPARATOR = "::";
|
|
||||||
|
|
||||||
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
|
||||||
|
|
||||||
{
|
|
||||||
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
|
|
||||||
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
|
|
||||||
|
|
||||||
{
|
|
||||||
put("ark".toLowerCase(), new Pair<>("ark", "ark"));
|
|
||||||
put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv"));
|
|
||||||
put("pmc".toLowerCase(), new Pair<>("pmc", "pmc"));
|
|
||||||
put("pmid".toLowerCase(), new Pair<>("pmid", "pmid"));
|
|
||||||
put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid"));
|
|
||||||
put("urn".toLowerCase(), new Pair<>("urn", "urn"));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static Map<String, Map<String, String>> typologiesMapping;
|
|
||||||
|
|
||||||
static {
|
|
||||||
try {
|
|
||||||
final String tt = IOUtils.toString(OrcidWorkToOAF.class.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json"));
|
|
||||||
typologiesMapping = new Gson().fromJson(tt, Map.class);
|
|
||||||
} catch (final Exception e) {
|
|
||||||
logger.error("loading typologies", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final String PID_TYPES = "dnet:pid_types";
|
|
||||||
|
|
||||||
public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement, final String setName) {
|
|
||||||
|
|
||||||
if (!isValid(rootElement/*, context*/)) { return null; }
|
|
||||||
|
|
||||||
Publication publication = new Publication();
|
|
||||||
|
|
||||||
final DataInfo dataInfo = new DataInfo();
|
|
||||||
dataInfo.setDeletedbyinference(false);
|
|
||||||
dataInfo.setInferred(false);
|
|
||||||
dataInfo.setTrust("0.9");
|
|
||||||
dataInfo.setProvenanceaction(
|
|
||||||
mapQualifier(
|
|
||||||
"sysimport:actionset:orcidworks-no-doi",
|
|
||||||
"sysimport:actionset:orcidworks-no-doi",
|
|
||||||
"dnet:provenanceActions",
|
|
||||||
"dnet:provenanceActions"));
|
|
||||||
publication.setDataInfo(dataInfo);
|
|
||||||
|
|
||||||
publication.setLastupdatetimestamp(new Date().getTime());
|
|
||||||
|
|
||||||
publication.setDateofcollection("2019-10-22");
|
|
||||||
publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601());
|
|
||||||
|
|
||||||
// Adding external ids
|
|
||||||
externalIds.keySet().stream()
|
|
||||||
.forEach(jsonExtId -> {
|
|
||||||
final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue();
|
|
||||||
final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey();
|
|
||||||
final String extId = getStringValue(rootElement, jsonExtId);
|
|
||||||
if (StringUtils.isNotBlank(extId)) {
|
|
||||||
publication.getExternalReference().add(
|
|
||||||
convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types"));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Adding source
|
|
||||||
// final String source = getStringValue(rootElement, "source");
|
|
||||||
// if (StringUtils.isNotBlank(source)) {
|
|
||||||
// metadata.addSource(StringField.newBuilder().setValue(source).build());
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Adding titles
|
|
||||||
final List<String> titles = createRepeatedField(rootElement, "titles");
|
|
||||||
if (titles==null || titles.isEmpty()) {
|
|
||||||
// context.incrementCounter("filtered", "title_not_found", 1);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
Qualifier q = mapQualifier("main title","main title","dnet:dataCite_title","dnet:dataCite_title");
|
|
||||||
publication.setTitle(
|
|
||||||
titles
|
|
||||||
.stream()
|
|
||||||
.map(t -> {
|
|
||||||
return mapStructuredProperty(t, q, null);
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
// Adding identifier
|
|
||||||
final String id = getStringValue(rootElement, "id");
|
|
||||||
String sourceId = null;
|
|
||||||
if (id != null) {
|
|
||||||
publication.setOriginalId(Arrays.asList(id));
|
|
||||||
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase()));
|
|
||||||
} else {
|
|
||||||
String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(","));
|
|
||||||
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase()));
|
|
||||||
}
|
|
||||||
publication.setId(sourceId);
|
|
||||||
|
|
||||||
// Adding relevant date
|
|
||||||
settingRelevantDate(rootElement, publication, "publication_date", "issued", true);
|
|
||||||
|
|
||||||
// Adding collectedfrom
|
|
||||||
publication.setCollectedfrom(Arrays.asList(createCollectedFrom()));
|
|
||||||
|
|
||||||
// Adding type
|
|
||||||
final String type = getStringValue(rootElement, "type");
|
|
||||||
String cobjValue = "";
|
|
||||||
if (StringUtils.isNotBlank(type)) {
|
|
||||||
publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource"));
|
|
||||||
|
|
||||||
final String typeValue = typologiesMapping.get(type).get("value");
|
|
||||||
cobjValue = typologiesMapping.get(type).get("cobj");
|
|
||||||
final Instance instance = new Instance();
|
|
||||||
|
|
||||||
// Adding hostedby
|
|
||||||
instance.setHostedby(createHostedBy());
|
|
||||||
|
|
||||||
// Adding url
|
|
||||||
final List<String> urls = createRepeatedField(rootElement, "urls");
|
|
||||||
if (urls!=null && !urls.isEmpty()) {
|
|
||||||
instance.setUrl(urls);
|
|
||||||
}
|
|
||||||
|
|
||||||
final String pubDate = getPublicationDate(rootElement, "publication_date");
|
|
||||||
if (StringUtils.isNotBlank(pubDate)) {
|
|
||||||
instance.setDateofacceptance(mapStringField(pubDate, null));
|
|
||||||
}
|
|
||||||
|
|
||||||
instance.setCollectedfrom(createCollectedFrom());
|
|
||||||
|
|
||||||
// Adding accessright
|
|
||||||
instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes"));
|
|
||||||
|
|
||||||
// Adding type
|
|
||||||
instance.setInstancetype(mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource"));
|
|
||||||
|
|
||||||
publication.setInstance(Arrays.asList(instance));
|
|
||||||
} else {
|
|
||||||
// context.incrementCounter("filtered", "type_not_found", 1);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Adding authors
|
|
||||||
final List<Author> authors = createAuthors(rootElement);
|
|
||||||
if (authors != null && authors.size() > 0) {
|
|
||||||
publication.setAuthor(authors);
|
|
||||||
} else {
|
|
||||||
// context.incrementCounter("filtered", "author_not_found", 1);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
String classValue = getDefaultResulttype(cobjValue);
|
|
||||||
publication.setResulttype(mapQualifier(classValue, classValue,"dnet:result_typologies", "dnet:result_typologies"));
|
|
||||||
return publication;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static List<Author> createAuthors(final JsonObject root) {
|
|
||||||
|
|
||||||
final String authorsJSONFieldName = "authors";
|
|
||||||
|
|
||||||
if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) {
|
|
||||||
|
|
||||||
final List<Author> authors = new ArrayList<>();
|
|
||||||
final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName);
|
|
||||||
int firstCounter = 0;
|
|
||||||
int defaultCounter = 0;
|
|
||||||
int rank = 1;
|
|
||||||
int currentRank = 0;
|
|
||||||
|
|
||||||
for (final JsonElement item : jsonAuthors) {
|
|
||||||
final JsonObject jsonAuthor = item.getAsJsonObject();
|
|
||||||
final Author author = new Author();
|
|
||||||
if (item.isJsonObject()) {
|
|
||||||
final String surname = getStringValue(jsonAuthor, "surname");
|
|
||||||
final String name = getStringValue(jsonAuthor, "name");
|
|
||||||
final String oid = getStringValue(jsonAuthor, "oid");
|
|
||||||
final String seq = getStringValue(jsonAuthor, "seq");
|
|
||||||
if (StringUtils.isNotBlank(seq)) {
|
|
||||||
if (seq.equals("first")) {
|
|
||||||
firstCounter += 1;
|
|
||||||
rank = firstCounter;
|
|
||||||
|
|
||||||
} else if (seq.equals("additional")) {
|
|
||||||
rank = currentRank + 1;
|
|
||||||
} else {
|
|
||||||
defaultCounter += 1;
|
|
||||||
rank = defaultCounter;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(oid)) {
|
|
||||||
author.setPid(Arrays.asList(mapAuthorId(oid)));
|
|
||||||
author.setFullname(name + " " + surname);
|
|
||||||
if (StringUtils.isNotBlank(name)) {
|
|
||||||
author.setName(name);
|
|
||||||
}
|
|
||||||
if (StringUtils.isNotBlank(surname)) {
|
|
||||||
author.setSurname(surname);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
String fullname = "";
|
|
||||||
if (StringUtils.isNotBlank(name)) {
|
|
||||||
fullname = name;
|
|
||||||
} else {
|
|
||||||
if (StringUtils.isNotBlank(surname)) {
|
|
||||||
fullname = surname;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PacePerson p = new PacePerson(fullname, false);
|
|
||||||
if (p.isAccurate()) {
|
|
||||||
author.setName(p.getNormalisedFirstName());
|
|
||||||
author.setSurname(p.getNormalisedSurname());
|
|
||||||
author.setFullname(p.getNormalisedFullname());
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
author.setFullname(fullname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
author.setRank(rank);
|
|
||||||
authors.add(author);
|
|
||||||
currentRank = rank;
|
|
||||||
}
|
|
||||||
return authors;
|
|
||||||
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<String> createRepeatedField(final JsonObject rootElement, final String fieldName) {
|
|
||||||
if (!rootElement.has(fieldName)) { return null; }
|
|
||||||
if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; }
|
|
||||||
if (rootElement.get(fieldName).isJsonArray()) {
|
|
||||||
if (!isValidJsonArray(rootElement, fieldName)) { return null; }
|
|
||||||
return getArrayValues(rootElement, fieldName);
|
|
||||||
} else {
|
|
||||||
String field = getStringValue(rootElement, fieldName);
|
|
||||||
return Arrays.asList(cleanField(field));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String cleanField(String value) {
|
|
||||||
if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
|
|
||||||
value = value.substring(1, value.length() - 1);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void settingRelevantDate(final JsonObject rootElement,
|
|
||||||
final Publication publication,
|
|
||||||
final String jsonKey,
|
|
||||||
final String dictionaryKey,
|
|
||||||
final boolean addToDateOfAcceptance) {
|
|
||||||
|
|
||||||
final String pubDate = getPublicationDate(rootElement, "publication_date");
|
|
||||||
if (StringUtils.isNotBlank(pubDate)) {
|
|
||||||
if (addToDateOfAcceptance) {
|
|
||||||
publication.setDateofacceptance(mapStringField(pubDate, null));
|
|
||||||
}
|
|
||||||
Qualifier q = mapQualifier(dictionaryKey,dictionaryKey,"dnet:dataCite_date","dnet:dataCite_date");
|
|
||||||
publication.setRelevantdate(
|
|
||||||
Arrays.asList(pubDate)
|
|
||||||
.stream()
|
|
||||||
.map(r -> {
|
|
||||||
return mapStructuredProperty(r, q, null);
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String getPublicationDate(final JsonObject rootElement,
|
|
||||||
final String jsonKey) {
|
|
||||||
|
|
||||||
final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey);
|
|
||||||
if (pubDateJson == null) { return null; }
|
|
||||||
final String year = getStringValue(pubDateJson, "year");
|
|
||||||
final String month = getStringValue(pubDateJson, "month");
|
|
||||||
final String day = getStringValue(pubDateJson, "day");
|
|
||||||
|
|
||||||
if (StringUtils.isBlank(year)) { return null; }
|
|
||||||
String pubDate = "".concat(year);
|
|
||||||
if (StringUtils.isNotBlank(month)) {
|
|
||||||
pubDate = pubDate.concat("-" + month);
|
|
||||||
if (StringUtils.isNotBlank(day)) {
|
|
||||||
pubDate = pubDate.concat("-" + day);
|
|
||||||
} else {
|
|
||||||
pubDate += "-01";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pubDate += "-01-01";
|
|
||||||
}
|
|
||||||
if (isValidDate(pubDate)) { return pubDate; }
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static boolean isValid(final JsonObject rootElement/*, final Reporter context*/) {
|
|
||||||
|
|
||||||
final String type = getStringValue(rootElement, "type");
|
|
||||||
if (!typologiesMapping.containsKey(type)) {
|
|
||||||
// context.incrementCounter("filtered", "unknowntype_" + type, 1);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isValidJsonArray(rootElement, "titles")) {
|
|
||||||
// context.incrementCounter("filtered", "invalid_title", 1);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) {
|
|
||||||
if (!rootElement.has(fieldName)) { return false; }
|
|
||||||
final JsonElement jsonElement = rootElement.get(fieldName);
|
|
||||||
if (jsonElement.isJsonNull()) { return false; }
|
|
||||||
if (jsonElement.isJsonArray()) {
|
|
||||||
final JsonArray jsonArray = jsonElement.getAsJsonArray();
|
|
||||||
if (jsonArray.isJsonNull()) { return false; }
|
|
||||||
if (jsonArray.get(0).isJsonNull()) { return false; }
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) {
|
|
||||||
final Qualifier qualifier = new Qualifier();
|
|
||||||
qualifier.setClassid(classId);
|
|
||||||
qualifier.setClassname(className);
|
|
||||||
qualifier.setSchemeid(schemeId);
|
|
||||||
qualifier.setSchemename(schemeName);
|
|
||||||
return qualifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, String schemeName) {
|
|
||||||
ExternalReference ex = new ExternalReference();
|
|
||||||
ex.setRefidentifier(extId);
|
|
||||||
ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName ));
|
|
||||||
return ex;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) {
|
|
||||||
if (value == null | StringUtils.isBlank(value)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final StructuredProperty structuredProperty = new StructuredProperty();
|
|
||||||
structuredProperty.setValue(value);
|
|
||||||
structuredProperty.setQualifier(qualifier);
|
|
||||||
structuredProperty.setDataInfo(dataInfo);
|
|
||||||
return structuredProperty;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Field<String> mapStringField(String value, DataInfo dataInfo) {
|
|
||||||
if (value == null || StringUtils.isBlank(value)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final Field<String> stringField = new Field<>();
|
|
||||||
stringField.setValue(value);
|
|
||||||
stringField.setDataInfo(dataInfo);
|
|
||||||
return stringField;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static KeyValue createCollectedFrom() {
|
|
||||||
KeyValue cf = new KeyValue();
|
|
||||||
cf.setValue(ORCID);
|
|
||||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
|
|
||||||
return cf;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static KeyValue createHostedBy() {
|
|
||||||
KeyValue hb = new KeyValue();
|
|
||||||
hb.setValue("Unknown Repository");
|
|
||||||
hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
|
|
||||||
return hb;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static StructuredProperty mapAuthorId(String orcidId) {
|
|
||||||
final StructuredProperty sp = new StructuredProperty();
|
|
||||||
sp.setValue(orcidId);
|
|
||||||
final Qualifier q = new Qualifier();
|
|
||||||
q.setClassid("ORCID");
|
|
||||||
q.setClassname("ORCID");
|
|
||||||
sp.setQualifier(q);
|
|
||||||
return sp;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,456 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.doiboost.orcidnodoi.oaf;
|
||||||
|
|
||||||
|
import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.JsonArray;
|
||||||
|
import com.google.gson.JsonElement;
|
||||||
|
import com.google.gson.JsonObject;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility;
|
||||||
|
import eu.dnetlib.doiboost.orcidnodoi.util.Pair;
|
||||||
|
|
||||||
|
public class PublicationToOaf {
|
||||||
|
|
||||||
|
static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class);
|
||||||
|
|
||||||
|
public static final String ORCID = "ORCID";
|
||||||
|
public final static String orcidPREFIX = "orcid_______";
|
||||||
|
public static final String OPENAIRE_PREFIX = "openaire____";
|
||||||
|
public static final String SEPARATOR = "::";
|
||||||
|
|
||||||
|
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
||||||
|
|
||||||
|
{
|
||||||
|
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
|
||||||
|
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
|
||||||
|
|
||||||
|
{
|
||||||
|
put("ark".toLowerCase(), new Pair<>("ark", "ark"));
|
||||||
|
put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv"));
|
||||||
|
put("pmc".toLowerCase(), new Pair<>("pmc", "pmc"));
|
||||||
|
put("pmid".toLowerCase(), new Pair<>("pmid", "pmid"));
|
||||||
|
put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid"));
|
||||||
|
put("urn".toLowerCase(), new Pair<>("urn", "urn"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static Map<String, Map<String, String>> typologiesMapping;
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
final String tt = IOUtils
|
||||||
|
.toString(
|
||||||
|
PublicationToOaf.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json"));
|
||||||
|
typologiesMapping = new Gson().fromJson(tt, Map.class);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
logger.error("loading typologies", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String PID_TYPES = "dnet:pid_types";
|
||||||
|
|
||||||
|
public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement) {
|
||||||
|
|
||||||
|
logger.debug("generatePublicationActionsFromDump ...");
|
||||||
|
if (!isValid(rootElement/* , context */)) {
|
||||||
|
logger.error("publication not valid");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Publication publication = new Publication();
|
||||||
|
|
||||||
|
final DataInfo dataInfo = new DataInfo();
|
||||||
|
dataInfo.setDeletedbyinference(false);
|
||||||
|
dataInfo.setInferred(false);
|
||||||
|
dataInfo.setTrust("0.9");
|
||||||
|
dataInfo
|
||||||
|
.setProvenanceaction(
|
||||||
|
mapQualifier(
|
||||||
|
"sysimport:actionset:orcidworks-no-doi",
|
||||||
|
"sysimport:actionset:orcidworks-no-doi",
|
||||||
|
"dnet:provenanceActions",
|
||||||
|
"dnet:provenanceActions"));
|
||||||
|
publication.setDataInfo(dataInfo);
|
||||||
|
|
||||||
|
publication.setLastupdatetimestamp(new Date().getTime());
|
||||||
|
|
||||||
|
publication.setDateofcollection("2019-10-22");
|
||||||
|
publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601());
|
||||||
|
|
||||||
|
// Adding external ids
|
||||||
|
externalIds
|
||||||
|
.keySet()
|
||||||
|
.stream()
|
||||||
|
.forEach(jsonExtId -> {
|
||||||
|
final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue();
|
||||||
|
final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey();
|
||||||
|
final String extId = getStringValue(rootElement, jsonExtId);
|
||||||
|
if (StringUtils.isNotBlank(extId)) {
|
||||||
|
publication
|
||||||
|
.getExternalReference()
|
||||||
|
.add(
|
||||||
|
convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types"));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Adding source
|
||||||
|
final String source = getStringValue(rootElement, "sourceName");
|
||||||
|
if (StringUtils.isNotBlank(source)) {
|
||||||
|
publication.setSource(Arrays.asList(mapStringField(source, null)));
|
||||||
|
}
|
||||||
|
|||||||
|
|
||||||
|
// Adding titles
|
||||||
|
final List<String> titles = createRepeatedField(rootElement, "titles");
|
||||||
|
if (titles == null || titles.isEmpty()) {
|
||||||
|
logger.error("titles not found");
|
||||||
|
// context.incrementCounter("filtered", "title_not_found", 1);
|
||||||
|
return null;
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a filter for null value: yes, there is a filter for null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
|
|||||||
|
}
|
||||||
|
Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
||||||
|
publication
|
||||||
|
.setTitle(
|
||||||
|
titles
|
||||||
|
.stream()
|
||||||
|
.map(t -> {
|
||||||
|
return mapStructuredProperty(t, q, null);
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
// Adding identifier
|
||||||
|
final String id = getStringValue(rootElement, "id");
|
||||||
|
String sourceId = null;
|
||||||
|
if (id != null) {
|
||||||
|
publication.setOriginalId(Arrays.asList(id));
|
||||||
|
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase()));
|
||||||
|
} else {
|
||||||
|
String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(","));
|
||||||
|
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase()));
|
||||||
|
}
|
||||||
|
publication.setId(sourceId);
|
||||||
|
|
||||||
|
// Adding relevant date
|
||||||
|
settingRelevantDate(rootElement, publication, "publication_date", "issued", true);
|
||||||
|
|
||||||
|
// Adding collectedfrom
|
||||||
|
publication.setCollectedfrom(Arrays.asList(createCollectedFrom()));
|
||||||
|
|
||||||
|
// Adding type
|
||||||
|
final String type = getStringValue(rootElement, "type");
|
||||||
|
String cobjValue = "";
|
||||||
|
if (StringUtils.isNotBlank(type)) {
|
||||||
|
publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource"));
|
||||||
|
|
||||||
|
final String typeValue = typologiesMapping.get(type).get("value");
|
||||||
|
cobjValue = typologiesMapping.get(type).get("cobj");
|
||||||
|
final Instance instance = new Instance();
|
||||||
|
|
||||||
|
// Adding hostedby
|
||||||
|
instance.setHostedby(createHostedBy());
|
||||||
|
|
||||||
|
// Adding url
|
||||||
|
final List<String> urls = createRepeatedField(rootElement, "urls");
|
||||||
|
if (urls != null && !urls.isEmpty()) {
|
||||||
|
instance.setUrl(urls);
|
||||||
|
}
|
||||||
|
|
||||||
|
final String pubDate = getPublicationDate(rootElement, "publication_date");
|
||||||
|
if (StringUtils.isNotBlank(pubDate)) {
|
||||||
|
instance.setDateofacceptance(mapStringField(pubDate, null));
|
||||||
|
}
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a check on null value yes, there is a check on null value
|
|||||||
|
|
||||||
|
instance.setCollectedfrom(createCollectedFrom());
|
||||||
|
|
||||||
|
// Adding accessright
|
||||||
|
instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes"));
|
||||||
|
|
||||||
|
// Adding type
|
||||||
|
instance
|
||||||
|
.setInstancetype(
|
||||||
|
mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource"));
|
||||||
|
|
||||||
|
publication.setInstance(Arrays.asList(instance));
|
||||||
|
} else {
|
||||||
|
logger.error("type not found");
|
||||||
|
// context.incrementCounter("filtered", "type_not_found", 1);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adding authors
|
||||||
|
final List<Author> authors = createAuthors(rootElement);
|
||||||
|
if (authors != null && authors.size() > 0) {
|
||||||
|
publication.setAuthor(authors);
|
||||||
|
} else {
|
||||||
|
logger.error("authors not found");
|
||||||
|
// context.incrementCounter("filtered", "author_not_found", 1);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String classValue = getDefaultResulttype(cobjValue);
|
||||||
|
publication
|
||||||
|
.setResulttype(mapQualifier(classValue, classValue, "dnet:result_typologies", "dnet:result_typologies"));
|
||||||
|
return publication;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<Author> createAuthors(final JsonObject root) {
|
||||||
|
|
||||||
|
final String authorsJSONFieldName = "contributors";
|
||||||
|
|
||||||
|
if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) {
|
||||||
|
|
||||||
|
final List<Author> authors = new ArrayList<>();
|
||||||
|
final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName);
|
||||||
|
int firstCounter = 0;
|
||||||
|
int defaultCounter = 0;
|
||||||
|
int rank = 1;
|
||||||
|
int currentRank = 0;
|
||||||
|
|
||||||
|
for (final JsonElement item : jsonAuthors) {
|
||||||
|
final JsonObject jsonAuthor = item.getAsJsonObject();
|
||||||
|
final Author author = new Author();
|
||||||
|
if (item.isJsonObject()) {
|
||||||
|
final String creditname = getStringValue(jsonAuthor, "creditName");
|
||||||
|
final String surname = getStringValue(jsonAuthor, "surname");
|
||||||
|
final String name = getStringValue(jsonAuthor, "name");
|
||||||
|
final String oid = getStringValue(jsonAuthor, "oid");
|
||||||
|
final String seq = getStringValue(jsonAuthor, "sequence");
|
||||||
|
if (StringUtils.isNotBlank(seq)) {
|
||||||
|
if (seq.equals("first")) {
|
||||||
|
firstCounter += 1;
|
||||||
|
rank = firstCounter;
|
||||||
|
|
||||||
|
} else if (seq.equals("additional")) {
|
||||||
|
rank = currentRank + 1;
|
||||||
|
} else {
|
||||||
|
defaultCounter += 1;
|
||||||
|
rank = defaultCounter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(oid)) {
|
||||||
|
author.setPid(Arrays.asList(mapAuthorId(oid)));
|
||||||
|
author.setFullname(name + " " + surname);
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a filter on null value: yes, there is a filter on null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
|
|||||||
|
if (StringUtils.isNotBlank(name)) {
|
||||||
|
author.setName(name);
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(surname)) {
|
||||||
|
author.setSurname(surname);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PacePerson p = new PacePerson(creditname, false);
|
||||||
|
if (p.isAccurate()) {
|
||||||
|
author.setName(p.getNormalisedFirstName());
|
||||||
|
author.setSurname(p.getNormalisedSurname());
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null); yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
|
|||||||
|
author.setFullname(p.getNormalisedFullname());
|
||||||
|
} else {
|
||||||
|
author.setFullname(creditname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
author.setRank(rank);
|
||||||
|
authors.add(author);
|
||||||
|
currentRank = rank;
|
||||||
|
}
|
||||||
|
return authors;
|
||||||
|
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> createRepeatedField(final JsonObject rootElement, final String fieldName) {
|
||||||
|
if (!rootElement.has(fieldName)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (rootElement.get(fieldName).isJsonArray()) {
|
||||||
|
if (!isValidJsonArray(rootElement, fieldName)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return getArrayValues(rootElement, fieldName);
|
||||||
|
} else {
|
||||||
|
String field = getStringValue(rootElement, fieldName);
|
||||||
|
return Arrays.asList(cleanField(field));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String cleanField(String value) {
|
||||||
|
if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
|
||||||
|
value = value.substring(1, value.length() - 1);
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void settingRelevantDate(final JsonObject rootElement,
|
||||||
|
final Publication publication,
|
||||||
|
final String jsonKey,
|
||||||
|
final String dictionaryKey,
|
||||||
|
final boolean addToDateOfAcceptance) {
|
||||||
|
|
||||||
|
final String pubDate = getPublicationDate(rootElement, "publication_date");
|
||||||
|
if (StringUtils.isNotBlank(pubDate)) {
|
||||||
|
if (addToDateOfAcceptance) {
|
||||||
|
publication.setDateofacceptance(mapStringField(pubDate, null));
|
||||||
|
}
|
||||||
|
Qualifier q = mapQualifier(dictionaryKey, dictionaryKey, "dnet:dataCite_date", "dnet:dataCite_date");
|
||||||
|
publication
|
||||||
|
.setRelevantdate(
|
||||||
|
Arrays
|
||||||
|
.asList(pubDate)
|
||||||
|
.stream()
|
||||||
|
.map(r -> {
|
||||||
|
return mapStructuredProperty(r, q, null);
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getPublicationDate(final JsonObject rootElement,
|
||||||
|
final String jsonKey) {
|
||||||
|
|
||||||
|
final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey);
|
||||||
|
if (pubDateJson == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a check on null value yes, there is a check on null value
|
|||||||
|
final String year = getStringValue(pubDateJson, "year");
|
||||||
|
final String month = getStringValue(pubDateJson, "month");
|
||||||
|
final String day = getStringValue(pubDateJson, "day");
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(year)) {
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a check on null value yes, there is a check on null value
|
|||||||
|
return null;
|
||||||
|
}
|
||||||
|
String pubDate = "".concat(year);
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a check on null value yes, there is a check on null value
|
|||||||
|
if (StringUtils.isNotBlank(month)) {
|
||||||
|
pubDate = pubDate.concat("-" + month);
|
||||||
|
if (StringUtils.isNotBlank(day)) {
|
||||||
|
pubDate = pubDate.concat("-" + day);
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is a check on null value yes, there is a check on null value
|
|||||||
|
} else {
|
||||||
|
pubDate += "-01";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pubDate += "-01-01";
|
||||||
|
}
|
||||||
|
if (isValidDate(pubDate)) {
|
||||||
|
return pubDate;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static boolean isValid(final JsonObject rootElement/* , final Reporter context */) {
|
||||||
|
|
||||||
|
final String type = getStringValue(rootElement, "type");
|
||||||
|
if (!typologiesMapping.containsKey(type)) {
|
||||||
|
logger.error("unknowntype_" + type);
|
||||||
|
// context.incrementCounter("filtered", "unknowntype_" + type, 1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isValidJsonArray(rootElement, "titles")) {
|
||||||
|
logger.error("invalid_title");
|
||||||
|
// context.incrementCounter("filtered", "invalid_title", 1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) {
|
||||||
|
if (!rootElement.has(fieldName)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
final JsonElement jsonElement = rootElement.get(fieldName);
|
||||||
|
if (jsonElement.isJsonNull()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (jsonElement.isJsonArray()) {
|
||||||
|
final JsonArray jsonArray = jsonElement.getAsJsonArray();
|
||||||
|
if (jsonArray.isJsonNull()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (jsonArray.get(0).isJsonNull()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) {
|
||||||
|
final Qualifier qualifier = new Qualifier();
|
||||||
|
qualifier.setClassid(classId);
|
||||||
|
qualifier.setClassname(className);
|
||||||
|
qualifier.setSchemeid(schemeId);
|
||||||
|
qualifier.setSchemename(schemeName);
|
||||||
|
return qualifier;
|
||||||
|
}
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is this ckeck on the value: StringUtils.isNotBlank yes, there is this ckeck on the value: StringUtils.isNotBlank
|
|||||||
|
|
||||||
|
private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId,
|
||||||
|
String schemeName) {
|
||||||
|
ExternalReference ex = new ExternalReference();
|
||||||
|
ex.setRefidentifier(extId);
|
||||||
|
ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName));
|
||||||
|
return ex;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) {
|
||||||
|
if (value == null | StringUtils.isBlank(value)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final StructuredProperty structuredProperty = new StructuredProperty();
|
||||||
|
structuredProperty.setValue(value);
|
||||||
claudio.atzori
commented
Is the caller expecting the Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
enrico.ottonello
commented
yes, there is this ckeck on the value: StringUtils.isNotBlank yes, there is this ckeck on the value: StringUtils.isNotBlank
|
|||||||
|
structuredProperty.setQualifier(qualifier);
|
||||||
|
structuredProperty.setDataInfo(dataInfo);
|
||||||
|
return structuredProperty;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Field<String> mapStringField(String value, DataInfo dataInfo) {
|
||||||
|
if (value == null || StringUtils.isBlank(value)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final Field<String> stringField = new Field<>();
|
||||||
|
stringField.setValue(value);
|
||||||
|
stringField.setDataInfo(dataInfo);
|
||||||
|
return stringField;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static KeyValue createCollectedFrom() {
|
||||||
|
KeyValue cf = new KeyValue();
|
||||||
|
cf.setValue(ORCID);
|
||||||
|
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
|
||||||
|
return cf;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static KeyValue createHostedBy() {
|
||||||
|
KeyValue hb = new KeyValue();
|
||||||
|
hb.setValue("Unknown Repository");
|
||||||
|
hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
|
||||||
|
return hb;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static StructuredProperty mapAuthorId(String orcidId) {
|
||||||
|
final StructuredProperty sp = new StructuredProperty();
|
||||||
|
sp.setValue(orcidId);
|
||||||
|
final Qualifier q = new Qualifier();
|
||||||
|
q.setClassid("ORCID");
|
||||||
|
q.setClassname("ORCID");
|
||||||
|
sp.setQualifier(q);
|
||||||
|
return sp;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,107 +1,109 @@
|
||||||
package eu.dnetlib.doiboost.orcidnodoi.util;
|
|
||||||
|
|
||||||
import com.google.gson.JsonArray;
|
package eu.dnetlib.doiboost.orcidnodoi.util;
|
||||||
import com.google.gson.JsonObject;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.gson.JsonArray;
|
||||||
|
import com.google.gson.JsonObject;
|
||||||
|
|
||||||
public class DumpToActionsUtility {
|
public class DumpToActionsUtility {
|
||||||
|
|
||||||
private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US);
|
private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US);
|
||||||
|
|
||||||
public static String getStringValue(final JsonObject root, final String key) {
|
public static String getStringValue(final JsonObject root, final String key) {
|
||||||
if (root.has(key) && !root.get(key).isJsonNull())
|
if (root.has(key) && !root.get(key).isJsonNull())
|
||||||
return root.get(key).getAsString();
|
return root.get(key).getAsString();
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<String> getArrayValues(final JsonObject root, final String key) {
|
public static List<String> getArrayValues(final JsonObject root, final String key) {
|
||||||
if (root.has(key) && root.get(key).isJsonArray()) {
|
if (root.has(key) && root.get(key).isJsonArray()) {
|
||||||
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
|
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
|
||||||
final List<String> result = new ArrayList<>();
|
final List<String> result = new ArrayList<>();
|
||||||
|
|
||||||
|
asJsonArray.forEach(it -> {
|
||||||
|
if (StringUtils.isNotBlank(it.getAsString())) {
|
||||||
|
result.add(it.getAsString());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
asJsonArray.forEach(it -> {
|
public static List<JsonObject> getArrayObjects(final JsonObject root, final String key) {
|
||||||
if (StringUtils.isNotBlank(it.getAsString())) {
|
if (root.has(key) && root.get(key).isJsonArray()) {
|
||||||
result.add(it.getAsString());
|
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
|
||||||
}
|
final List<JsonObject> result = new ArrayList<>();
|
||||||
});
|
asJsonArray.forEach(it -> {
|
||||||
return result;
|
if (it.getAsJsonObject() != null) {
|
||||||
}
|
result.add(it.getAsJsonObject());
|
||||||
return new ArrayList<>();
|
}
|
||||||
}
|
});
|
||||||
public static List<JsonObject> getArrayObjects(final JsonObject root, final String key) {
|
return result;
|
||||||
if (root.has(key) && root.get(key).isJsonArray()) {
|
}
|
||||||
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
|
return new ArrayList<>();
|
||||||
final List<JsonObject> result = new ArrayList<>();
|
}
|
||||||
asJsonArray.forEach(it -> {
|
|
||||||
if (it.getAsJsonObject() != null) {
|
|
||||||
result.add(it.getAsJsonObject());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
return new ArrayList<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isValidDate(final String date) {
|
public static boolean isValidDate(final String date) {
|
||||||
return date.matches("\\d{4}-\\d{2}-\\d{2}");
|
return date.matches("\\d{4}-\\d{2}-\\d{2}");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String now_ISO8601() { // NOPMD
|
public static String now_ISO8601() { // NOPMD
|
||||||
String result;
|
String result;
|
||||||
synchronized (ISO8601FORMAT) {
|
synchronized (ISO8601FORMAT) {
|
||||||
result = ISO8601FORMAT.format(new Date());
|
result = ISO8601FORMAT.format(new Date());
|
||||||
}
|
}
|
||||||
//convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00
|
// convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00
|
||||||
//- note the added colon for the Timezone
|
// - note the added colon for the Timezone
|
||||||
return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2);
|
return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getDefaultResulttype(final String cobjcategory) {
|
public static String getDefaultResulttype(final String cobjcategory) {
|
||||||
switch (cobjcategory) {
|
switch (cobjcategory) {
|
||||||
case "0029":
|
case "0029":
|
||||||
return "software";
|
return "software";
|
||||||
case "0021":
|
case "0021":
|
||||||
case "0024":
|
case "0024":
|
||||||
case "0025":
|
case "0025":
|
||||||
case "0030":
|
case "0030":
|
||||||
return "dataset";
|
return "dataset";
|
||||||
case "0000":
|
case "0000":
|
||||||
case "0010":
|
case "0010":
|
||||||
case "0018":
|
case "0018":
|
||||||
case "0020":
|
case "0020":
|
||||||
case "0022":
|
case "0022":
|
||||||
case "0023":
|
case "0023":
|
||||||
case "0026":
|
case "0026":
|
||||||
case "0027":
|
case "0027":
|
||||||
case "0028":
|
case "0028":
|
||||||
case "0037":
|
case "0037":
|
||||||
return "other";
|
return "other";
|
||||||
case "0001":
|
case "0001":
|
||||||
case "0002":
|
case "0002":
|
||||||
case "0004":
|
case "0004":
|
||||||
case "0005":
|
case "0005":
|
||||||
case "0006":
|
case "0006":
|
||||||
case "0007":
|
case "0007":
|
||||||
case "0008":
|
case "0008":
|
||||||
case "0009":
|
case "0009":
|
||||||
case "0011":
|
case "0011":
|
||||||
case "0012":
|
case "0012":
|
||||||
case "0013":
|
case "0013":
|
||||||
case "0014":
|
case "0014":
|
||||||
case "0015":
|
case "0015":
|
||||||
case "0016":
|
case "0016":
|
||||||
case "0017":
|
case "0017":
|
||||||
case "0019":
|
case "0019":
|
||||||
case "0031":
|
case "0031":
|
||||||
case "0032":
|
case "0032":
|
||||||
return "publication";
|
return "publication";
|
||||||
default:
|
default:
|
||||||
return "publication";
|
return "publication";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,30 +1,32 @@
|
||||||
|
|
||||||
package eu.dnetlib.doiboost.orcidnodoi.util;
|
package eu.dnetlib.doiboost.orcidnodoi.util;
|
||||||
|
|
||||||
public class Pair<K, V> {
|
public class Pair<K, V> {
|
||||||
|
|
||||||
private K k;
|
private K k;
|
||||||
|
|
||||||
private V v;
|
private V v;
|
||||||
|
|
||||||
public Pair(K k, V v) {
|
public Pair(K k, V v) {
|
||||||
this.k = k;
|
this.k = k;
|
||||||
this.v = v;
|
this.v = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
public K getKey() {
|
public K getKey() {
|
||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
public V getValue() {
|
public V getValue() {
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (obj instanceof Pair<?, ?>) {
|
if (obj instanceof Pair<?, ?>) {
|
||||||
Pair<?, ?> tmp = (Pair<?, ?>) obj;
|
Pair<?, ?> tmp = (Pair<?, ?>) obj;
|
||||||
return k.equals(tmp.getKey()) && v.equals(tmp.getValue());
|
return k.equals(tmp.getKey()) && v.equals(tmp.getValue());
|
||||||
} else return false;
|
} else
|
||||||
}
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,7 +54,7 @@ public class OrcidClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Test
|
// @Test
|
||||||
public void testLambdaFileParser() throws Exception {
|
private void testLambdaFileParser() throws Exception {
|
||||||
try (BufferedReader br = new BufferedReader(
|
try (BufferedReader br = new BufferedReader(
|
||||||
new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
|
new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
|
||||||
String line;
|
String line;
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.doiboost.orcidnodoi;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.gson.JsonElement;
|
||||||
|
import com.google.gson.JsonParser;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
|
||||||
|
|
||||||
|
public class PublicationToOafTest {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// @Ignore
|
||||||
|
public void convertOafPublicationTest() throws Exception {
|
||||||
|
String jsonPublication = IOUtils
|
||||||
|
.toString(
|
||||||
|
PublicationToOafTest.class.getResourceAsStream("publication.json"));
|
||||||
|
JsonElement j = new JsonParser().parse(jsonPublication);
|
||||||
|
logger.info("json publication loaded: " + j.toString());
|
||||||
|
Publication oafPublication = (Publication) PublicationToOaf
|
||||||
|
.generatePublicationActionsFromDump(j.getAsJsonObject());
|
||||||
|
assertNotNull(oafPublication.getId());
|
||||||
|
assertNotNull(oafPublication.getOriginalId());
|
||||||
|
assertEquals(oafPublication.getOriginalId().get(0), "60153327");
|
||||||
|
logger.info("oafPublication.getId(): " + oafPublication.getId());
|
||||||
|
assertEquals(
|
||||||
|
oafPublication.getTitle().get(0).getValue(),
|
||||||
|
"Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp");
|
||||||
|
assertNotNull(oafPublication.getLastupdatetimestamp());
|
||||||
|
assertNotNull(oafPublication.getDateofcollection());
|
||||||
|
assertNotNull(oafPublication.getDateoftransformation());
|
||||||
|
assertTrue(oafPublication.getAuthor().size() == 7);
|
||||||
|
oafPublication.getAuthor().forEach(a -> {
|
||||||
|
assertNotNull(a.getFullname());
|
||||||
|
assertNotNull(a.getRank());
|
||||||
|
logger.info("a.getFullname(): " + a.getFullname());
|
||||||
|
if (a.getName() != null) {
|
||||||
|
logger.info("a.getName(): " + a.getName());
|
||||||
|
}
|
||||||
|
if (a.getSurname() != null) {
|
||||||
|
logger.info("a.getSurname(): " + a.getSurname());
|
||||||
|
}
|
||||||
|
logger.info("a.getRank(): " + a.getRank());
|
||||||
|
if (a.getPid() != null) {
|
||||||
|
logger.info("a.getPid(): " + a.getPid().get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
assertNotNull(oafPublication.getCollectedfrom());
|
||||||
|
if (oafPublication.getSource() != null) {
|
||||||
|
logger.info((oafPublication.getSource().get(0).getValue()));
|
||||||
|
}
|
||||||
|
if (oafPublication.getExternalReference() != null) {
|
||||||
|
oafPublication.getExternalReference().forEach(e -> {
|
||||||
|
assertNotNull(e.getRefidentifier());
|
||||||
|
assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
assertNotNull(oafPublication.getInstance());
|
||||||
|
oafPublication.getInstance().forEach(i -> {
|
||||||
|
assertNotNull(i.getInstancetype().getClassid());
|
||||||
|
logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid());
|
||||||
|
assertNotNull(i.getInstancetype().getClassname());
|
||||||
|
logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
|
@ -95,7 +95,8 @@ public class OrcidNoDoiTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void authorMatchTest() throws Exception {
|
@Ignore
|
||||||
|
private void authorMatchTest() throws Exception {
|
||||||
logger.info("running authorSimpleMatchTest ....");
|
logger.info("running authorSimpleMatchTest ....");
|
||||||
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
|
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
|
||||||
AuthorData author = new AuthorData();
|
AuthorData author = new AuthorData();
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"oid":"0000-0002-4147-3387","id":"60153327","sourceName":"The Chinese University of Hong Kong","type":"conference-paper","titles":["Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"],"extIds":[{"type":"wosuid","value":"000425015800225","relationShip":"self"},{"type":"other-id","value":"441f521e-ab19-448d-ba32-83157b348ada","relationShip":"self"}],"publicationDates":[],"contributors":[{"sequence":"1","oid":"0000-0002-4147-3387","name":"Elaine","surname":"Chow","creditName":"Elaine Chow"},{"sequence":"2","creditName":"Victor Tsui"},{"sequence":"3","creditName":"Achim Müller"},{"sequence":"4","creditName":"Vincy Lee"},{"sequence":"5","creditName":"Lucia Krivánekova"},{"sequence":"6","creditName":"Roland Krivánek"},{"sequence":"7","creditName":"Juliana CN Chan"}]}
|
Is the caller expecting the
null
? Otherwise this would likely produce a NPE.yes, there is a filter for null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD
.map(
e -> {
return (Publication) publicationToOaf
.generatePublicationActionsFromJson(e._2());
})
.filter(p -> p != null);