orcid-no-doi #43

Merged
claudio.atzori merged 45 commits from enrico.ottonello/dnet-hadoop:orcid-no-doi into master 2020-12-02 10:55:12 +01:00
8 changed files with 650 additions and 532 deletions
Showing only changes of commit 1729cc5cf3 - Show all commits

View File

@ -1,420 +0,0 @@
package eu.dnetlib.doiboost.orcidnodoi.oaf;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks;
import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility;
import eu.dnetlib.doiboost.orcidnodoi.util.Pair;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*;
public class OrcidWorkToOAF {
static Logger logger = LoggerFactory.getLogger(OrcidWorkToOAF.class);
public static final String ORCID = "ORCID";
public final static String orcidPREFIX = "orcid_______";
public static final String OPENAIRE_PREFIX = "openaire____";
public static final String SEPARATOR = "::";
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
{
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
}
};
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
{
put("ark".toLowerCase(), new Pair<>("ark", "ark"));
put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv"));
put("pmc".toLowerCase(), new Pair<>("pmc", "pmc"));
put("pmid".toLowerCase(), new Pair<>("pmid", "pmid"));
put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid"));
put("urn".toLowerCase(), new Pair<>("urn", "urn"));
}
};
static Map<String, Map<String, String>> typologiesMapping;
static {
try {
final String tt = IOUtils.toString(OrcidWorkToOAF.class.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json"));
typologiesMapping = new Gson().fromJson(tt, Map.class);
} catch (final Exception e) {
logger.error("loading typologies", e);
}
}
public static final String PID_TYPES = "dnet:pid_types";
public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement, final String setName) {
if (!isValid(rootElement/*, context*/)) { return null; }
Publication publication = new Publication();
final DataInfo dataInfo = new DataInfo();
dataInfo.setDeletedbyinference(false);
dataInfo.setInferred(false);
dataInfo.setTrust("0.9");
dataInfo.setProvenanceaction(
mapQualifier(
"sysimport:actionset:orcidworks-no-doi",
"sysimport:actionset:orcidworks-no-doi",
"dnet:provenanceActions",
"dnet:provenanceActions"));
publication.setDataInfo(dataInfo);
publication.setLastupdatetimestamp(new Date().getTime());
publication.setDateofcollection("2019-10-22");
publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601());
// Adding external ids
externalIds.keySet().stream()
.forEach(jsonExtId -> {
final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue();
final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey();
final String extId = getStringValue(rootElement, jsonExtId);
if (StringUtils.isNotBlank(extId)) {
publication.getExternalReference().add(
convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types"));
}
});
// Adding source
// final String source = getStringValue(rootElement, "source");
// if (StringUtils.isNotBlank(source)) {
// metadata.addSource(StringField.newBuilder().setValue(source).build());
// }
// Adding titles
final List<String> titles = createRepeatedField(rootElement, "titles");
if (titles==null || titles.isEmpty()) {
// context.incrementCounter("filtered", "title_not_found", 1);
return null;
}
Qualifier q = mapQualifier("main title","main title","dnet:dataCite_title","dnet:dataCite_title");
publication.setTitle(
titles
.stream()
.map(t -> {
return mapStructuredProperty(t, q, null);
})
.collect(Collectors.toList()));
// Adding identifier
final String id = getStringValue(rootElement, "id");
String sourceId = null;
if (id != null) {
publication.setOriginalId(Arrays.asList(id));
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase()));
} else {
String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(","));
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase()));
}
publication.setId(sourceId);
// Adding relevant date
settingRelevantDate(rootElement, publication, "publication_date", "issued", true);
// Adding collectedfrom
publication.setCollectedfrom(Arrays.asList(createCollectedFrom()));
// Adding type
final String type = getStringValue(rootElement, "type");
String cobjValue = "";
if (StringUtils.isNotBlank(type)) {
publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource"));
final String typeValue = typologiesMapping.get(type).get("value");
cobjValue = typologiesMapping.get(type).get("cobj");
final Instance instance = new Instance();
// Adding hostedby
instance.setHostedby(createHostedBy());
// Adding url
final List<String> urls = createRepeatedField(rootElement, "urls");
if (urls!=null && !urls.isEmpty()) {
instance.setUrl(urls);
}
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
instance.setDateofacceptance(mapStringField(pubDate, null));
}
instance.setCollectedfrom(createCollectedFrom());
// Adding accessright
instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes"));
// Adding type
instance.setInstancetype(mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource"));
publication.setInstance(Arrays.asList(instance));
} else {
// context.incrementCounter("filtered", "type_not_found", 1);
return null;
}
// Adding authors
final List<Author> authors = createAuthors(rootElement);
if (authors != null && authors.size() > 0) {
publication.setAuthor(authors);
} else {
// context.incrementCounter("filtered", "author_not_found", 1);
return null;
}
String classValue = getDefaultResulttype(cobjValue);
publication.setResulttype(mapQualifier(classValue, classValue,"dnet:result_typologies", "dnet:result_typologies"));
return publication;
}
public static List<Author> createAuthors(final JsonObject root) {
final String authorsJSONFieldName = "authors";
if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) {
final List<Author> authors = new ArrayList<>();
final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName);
int firstCounter = 0;
int defaultCounter = 0;
int rank = 1;
int currentRank = 0;
for (final JsonElement item : jsonAuthors) {
final JsonObject jsonAuthor = item.getAsJsonObject();
final Author author = new Author();
if (item.isJsonObject()) {
final String surname = getStringValue(jsonAuthor, "surname");
final String name = getStringValue(jsonAuthor, "name");
final String oid = getStringValue(jsonAuthor, "oid");
final String seq = getStringValue(jsonAuthor, "seq");
if (StringUtils.isNotBlank(seq)) {
if (seq.equals("first")) {
firstCounter += 1;
rank = firstCounter;
} else if (seq.equals("additional")) {
rank = currentRank + 1;
} else {
defaultCounter += 1;
rank = defaultCounter;
}
}
if (StringUtils.isNotBlank(oid)) {
author.setPid(Arrays.asList(mapAuthorId(oid)));
author.setFullname(name + " " + surname);
if (StringUtils.isNotBlank(name)) {
author.setName(name);
}
if (StringUtils.isNotBlank(surname)) {
author.setSurname(surname);
}
} else {
String fullname = "";
if (StringUtils.isNotBlank(name)) {
fullname = name;
} else {
if (StringUtils.isNotBlank(surname)) {
fullname = surname;
}
}
PacePerson p = new PacePerson(fullname, false);
if (p.isAccurate()) {
author.setName(p.getNormalisedFirstName());
author.setSurname(p.getNormalisedSurname());
author.setFullname(p.getNormalisedFullname());
}
else {
author.setFullname(fullname);
}
}
}
author.setRank(rank);
authors.add(author);
currentRank = rank;
}
return authors;
}
return null;
}
private static List<String> createRepeatedField(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) { return null; }
if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; }
if (rootElement.get(fieldName).isJsonArray()) {
if (!isValidJsonArray(rootElement, fieldName)) { return null; }
return getArrayValues(rootElement, fieldName);
} else {
String field = getStringValue(rootElement, fieldName);
return Arrays.asList(cleanField(field));
}
}
private static String cleanField(String value) {
if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
value = value.substring(1, value.length() - 1);
}
return value;
}
private static void settingRelevantDate(final JsonObject rootElement,
final Publication publication,
final String jsonKey,
final String dictionaryKey,
final boolean addToDateOfAcceptance) {
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
if (addToDateOfAcceptance) {
publication.setDateofacceptance(mapStringField(pubDate, null));
}
Qualifier q = mapQualifier(dictionaryKey,dictionaryKey,"dnet:dataCite_date","dnet:dataCite_date");
publication.setRelevantdate(
Arrays.asList(pubDate)
.stream()
.map(r -> {
return mapStructuredProperty(r, q, null);
})
.collect(Collectors.toList()));
}
}
private static String getPublicationDate(final JsonObject rootElement,
final String jsonKey) {
final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey);
if (pubDateJson == null) { return null; }
final String year = getStringValue(pubDateJson, "year");
final String month = getStringValue(pubDateJson, "month");
final String day = getStringValue(pubDateJson, "day");
if (StringUtils.isBlank(year)) { return null; }
String pubDate = "".concat(year);
if (StringUtils.isNotBlank(month)) {
pubDate = pubDate.concat("-" + month);
if (StringUtils.isNotBlank(day)) {
pubDate = pubDate.concat("-" + day);
} else {
pubDate += "-01";
}
} else {
pubDate += "-01-01";
}
if (isValidDate(pubDate)) { return pubDate; }
return null;
}
protected static boolean isValid(final JsonObject rootElement/*, final Reporter context*/) {
final String type = getStringValue(rootElement, "type");
if (!typologiesMapping.containsKey(type)) {
// context.incrementCounter("filtered", "unknowntype_" + type, 1);
return false;
}
if (!isValidJsonArray(rootElement, "titles")) {
// context.incrementCounter("filtered", "invalid_title", 1);
return false;
}
return true;
}
private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) { return false; }
final JsonElement jsonElement = rootElement.get(fieldName);
if (jsonElement.isJsonNull()) { return false; }
if (jsonElement.isJsonArray()) {
final JsonArray jsonArray = jsonElement.getAsJsonArray();
if (jsonArray.isJsonNull()) { return false; }
if (jsonArray.get(0).isJsonNull()) { return false; }
}
return true;
}
private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) {
final Qualifier qualifier = new Qualifier();
qualifier.setClassid(classId);
qualifier.setClassname(className);
qualifier.setSchemeid(schemeId);
qualifier.setSchemename(schemeName);
return qualifier;
}
private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, String schemeName) {
ExternalReference ex = new ExternalReference();
ex.setRefidentifier(extId);
ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName ));
return ex;
}
private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) {
if (value == null | StringUtils.isBlank(value)) {
return null;
}
final StructuredProperty structuredProperty = new StructuredProperty();
structuredProperty.setValue(value);
structuredProperty.setQualifier(qualifier);
structuredProperty.setDataInfo(dataInfo);
return structuredProperty;
}
private static Field<String> mapStringField(String value, DataInfo dataInfo) {
if (value == null || StringUtils.isBlank(value)) {
return null;
}
final Field<String> stringField = new Field<>();
stringField.setValue(value);
stringField.setDataInfo(dataInfo);
return stringField;
}
private static KeyValue createCollectedFrom() {
KeyValue cf = new KeyValue();
cf.setValue(ORCID);
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
return cf;
}
private static KeyValue createHostedBy() {
KeyValue hb = new KeyValue();
hb.setValue("Unknown Repository");
hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
return hb;
}
private static StructuredProperty mapAuthorId(String orcidId) {
final StructuredProperty sp = new StructuredProperty();
sp.setValue(orcidId);
final Qualifier q = new Qualifier();
q.setClassid("ORCID");
q.setClassname("ORCID");
sp.setQualifier(q);
return sp;
}
}

View File

@ -0,0 +1,456 @@
package eu.dnetlib.doiboost.orcidnodoi.oaf;
import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility;
import eu.dnetlib.doiboost.orcidnodoi.util.Pair;
public class PublicationToOaf {
static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class);
public static final String ORCID = "ORCID";
public final static String orcidPREFIX = "orcid_______";
public static final String OPENAIRE_PREFIX = "openaire____";
public static final String SEPARATOR = "::";
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
{
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
}
};
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
{
put("ark".toLowerCase(), new Pair<>("ark", "ark"));
put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv"));
put("pmc".toLowerCase(), new Pair<>("pmc", "pmc"));
put("pmid".toLowerCase(), new Pair<>("pmid", "pmid"));
put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid"));
put("urn".toLowerCase(), new Pair<>("urn", "urn"));
}
};
static Map<String, Map<String, String>> typologiesMapping;
static {
try {
final String tt = IOUtils
.toString(
PublicationToOaf.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json"));
typologiesMapping = new Gson().fromJson(tt, Map.class);
} catch (final Exception e) {
logger.error("loading typologies", e);
}
}
public static final String PID_TYPES = "dnet:pid_types";
public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement) {
logger.debug("generatePublicationActionsFromDump ...");
if (!isValid(rootElement/* , context */)) {
logger.error("publication not valid");
return null;
}
Publication publication = new Publication();
final DataInfo dataInfo = new DataInfo();
dataInfo.setDeletedbyinference(false);
dataInfo.setInferred(false);
dataInfo.setTrust("0.9");
dataInfo
.setProvenanceaction(
mapQualifier(
"sysimport:actionset:orcidworks-no-doi",
"sysimport:actionset:orcidworks-no-doi",
"dnet:provenanceActions",
"dnet:provenanceActions"));
publication.setDataInfo(dataInfo);
publication.setLastupdatetimestamp(new Date().getTime());
publication.setDateofcollection("2019-10-22");
publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601());
// Adding external ids
externalIds

This should not happen as this is statically defined, but please let the exception propagate with some subclass of Throwable so that it will break immediately. Otherwise the typologiesMapping variable will stay defined as null causing the 1st usage to break with a NPE.

This should not happen as this is statically defined, but please let the exception propagate with some subclass of `Throwable` so that it will break immediately. Otherwise the `typologiesMapping` variable will stay defined as `null` causing the 1st usage to break with a NPE.
.keySet()
.stream()
.forEach(jsonExtId -> {
final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue();
final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey();
final String extId = getStringValue(rootElement, jsonExtId);
if (StringUtils.isNotBlank(extId)) {
publication
.getExternalReference()
.add(
convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types"));
}
});
// Adding source
final String source = getStringValue(rootElement, "sourceName");
if (StringUtils.isNotBlank(source)) {
publication.setSource(Arrays.asList(mapStringField(source, null)));
}
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a filter for null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD
.map(
e -> {
return (Publication) publicationToOaf
.generatePublicationActionsFromJson(e._2());
})
.filter(p -> p != null);

yes, there is a filter for null value: JavaRDD<Publication> oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
// Adding titles
final List<String> titles = createRepeatedField(rootElement, "titles");
if (titles == null || titles.isEmpty()) {
logger.error("titles not found");
// context.incrementCounter("filtered", "title_not_found", 1);
return null;
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a filter for null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);

yes, there is a filter for null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
}
Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
publication
.setTitle(
titles
.stream()
.map(t -> {
return mapStructuredProperty(t, q, null);
})
.collect(Collectors.toList()));
// Adding identifier
final String id = getStringValue(rootElement, "id");
String sourceId = null;
if (id != null) {
publication.setOriginalId(Arrays.asList(id));
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase()));
} else {
String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(","));
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase()));
}
publication.setId(sourceId);
// Adding relevant date
settingRelevantDate(rootElement, publication, "publication_date", "issued", true);
// Adding collectedfrom
publication.setCollectedfrom(Arrays.asList(createCollectedFrom()));
// Adding type
final String type = getStringValue(rootElement, "type");
String cobjValue = "";
if (StringUtils.isNotBlank(type)) {
publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource"));
final String typeValue = typologiesMapping.get(type).get("value");
cobjValue = typologiesMapping.get(type).get("cobj");
final Instance instance = new Instance();
// Adding hostedby
instance.setHostedby(createHostedBy());
// Adding url
final List<String> urls = createRepeatedField(rootElement, "urls");
if (urls != null && !urls.isEmpty()) {
instance.setUrl(urls);
}
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
instance.setDateofacceptance(mapStringField(pubDate, null));
}
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a check on null value

yes, there is a check on null value
instance.setCollectedfrom(createCollectedFrom());
// Adding accessright
instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes"));
// Adding type
instance
.setInstancetype(
mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource"));
publication.setInstance(Arrays.asList(instance));
} else {
logger.error("type not found");
// context.incrementCounter("filtered", "type_not_found", 1);
return null;
}
// Adding authors
final List<Author> authors = createAuthors(rootElement);
if (authors != null && authors.size() > 0) {
publication.setAuthor(authors);
} else {
logger.error("authors not found");
// context.incrementCounter("filtered", "author_not_found", 1);
return null;
}
String classValue = getDefaultResulttype(cobjValue);
publication
.setResulttype(mapQualifier(classValue, classValue, "dnet:result_typologies", "dnet:result_typologies"));
return publication;
}
public static List<Author> createAuthors(final JsonObject root) {
final String authorsJSONFieldName = "contributors";
if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) {
final List<Author> authors = new ArrayList<>();
final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName);
int firstCounter = 0;
int defaultCounter = 0;
int rank = 1;
int currentRank = 0;
for (final JsonElement item : jsonAuthors) {
final JsonObject jsonAuthor = item.getAsJsonObject();
final Author author = new Author();
if (item.isJsonObject()) {
final String creditname = getStringValue(jsonAuthor, "creditName");
final String surname = getStringValue(jsonAuthor, "surname");
final String name = getStringValue(jsonAuthor, "name");
final String oid = getStringValue(jsonAuthor, "oid");
final String seq = getStringValue(jsonAuthor, "sequence");
if (StringUtils.isNotBlank(seq)) {
if (seq.equals("first")) {
firstCounter += 1;
rank = firstCounter;
} else if (seq.equals("additional")) {
rank = currentRank + 1;
} else {
defaultCounter += 1;
rank = defaultCounter;
}
}
if (StringUtils.isNotBlank(oid)) {
author.setPid(Arrays.asList(mapAuthorId(oid)));
author.setFullname(name + " " + surname);
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a filter on null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);

yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
if (StringUtils.isNotBlank(name)) {
author.setName(name);
}
if (StringUtils.isNotBlank(surname)) {
author.setSurname(surname);
}
} else {
PacePerson p = new PacePerson(creditname, false);
if (p.isAccurate()) {
author.setName(p.getNormalisedFirstName());
author.setSurname(p.getNormalisedSurname());
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);

yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
author.setFullname(p.getNormalisedFullname());
} else {
author.setFullname(creditname);
}
}
}
author.setRank(rank);
authors.add(author);
currentRank = rank;
}
return authors;
}
return null;
}
private static List<String> createRepeatedField(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) {
return null;
}
if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) {
return null;
}
if (rootElement.get(fieldName).isJsonArray()) {
if (!isValidJsonArray(rootElement, fieldName)) {
return null;
}
return getArrayValues(rootElement, fieldName);
} else {
String field = getStringValue(rootElement, fieldName);
return Arrays.asList(cleanField(field));
}
}
private static String cleanField(String value) {
if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
value = value.substring(1, value.length() - 1);
}
return value;
}
private static void settingRelevantDate(final JsonObject rootElement,
final Publication publication,
final String jsonKey,
final String dictionaryKey,
final boolean addToDateOfAcceptance) {
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
if (addToDateOfAcceptance) {
publication.setDateofacceptance(mapStringField(pubDate, null));
}
Qualifier q = mapQualifier(dictionaryKey, dictionaryKey, "dnet:dataCite_date", "dnet:dataCite_date");
publication
.setRelevantdate(
Arrays
.asList(pubDate)
.stream()
.map(r -> {
return mapStructuredProperty(r, q, null);
})
.collect(Collectors.toList()));
}
}
private static String getPublicationDate(final JsonObject rootElement,
final String jsonKey) {
final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey);
if (pubDateJson == null) {
return null;
}
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a check on null value

yes, there is a check on null value
final String year = getStringValue(pubDateJson, "year");
final String month = getStringValue(pubDateJson, "month");
final String day = getStringValue(pubDateJson, "day");
if (StringUtils.isBlank(year)) {
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a check on null value

yes, there is a check on null value
return null;
}
String pubDate = "".concat(year);
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a check on null value

yes, there is a check on null value
if (StringUtils.isNotBlank(month)) {
pubDate = pubDate.concat("-" + month);
if (StringUtils.isNotBlank(day)) {
pubDate = pubDate.concat("-" + day);
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is a check on null value

yes, there is a check on null value
} else {
pubDate += "-01";
}
} else {
pubDate += "-01-01";
}
if (isValidDate(pubDate)) {
return pubDate;
}
return null;
}
protected static boolean isValid(final JsonObject rootElement/* , final Reporter context */) {
final String type = getStringValue(rootElement, "type");
if (!typologiesMapping.containsKey(type)) {
logger.error("unknowntype_" + type);
// context.incrementCounter("filtered", "unknowntype_" + type, 1);
return false;
}
if (!isValidJsonArray(rootElement, "titles")) {
logger.error("invalid_title");
// context.incrementCounter("filtered", "invalid_title", 1);
return false;
}
return true;
}
private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) {
return false;
}
final JsonElement jsonElement = rootElement.get(fieldName);
if (jsonElement.isJsonNull()) {
return false;
}
if (jsonElement.isJsonArray()) {
final JsonArray jsonArray = jsonElement.getAsJsonArray();
if (jsonArray.isJsonNull()) {
return false;
}
if (jsonArray.get(0).isJsonNull()) {
return false;
}
}
return true;

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is this ckeck on the value: StringUtils.isNotBlank

yes, there is this ckeck on the value: StringUtils.isNotBlank
}
private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) {

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is this ckeck on the value: StringUtils.isNotBlank

yes, there is this ckeck on the value: StringUtils.isNotBlank
final Qualifier qualifier = new Qualifier();
qualifier.setClassid(classId);
qualifier.setClassname(className);
qualifier.setSchemeid(schemeId);
qualifier.setSchemename(schemeName);
return qualifier;
}
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is this ckeck on the value: StringUtils.isNotBlank

yes, there is this ckeck on the value: StringUtils.isNotBlank
private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId,
String schemeName) {
ExternalReference ex = new ExternalReference();
ex.setRefidentifier(extId);
ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName));
return ex;
}
private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) {
if (value == null | StringUtils.isBlank(value)) {
return null;
}
final StructuredProperty structuredProperty = new StructuredProperty();
structuredProperty.setValue(value);
Review

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.
Review

yes, there is this ckeck on the value: StringUtils.isNotBlank

yes, there is this ckeck on the value: StringUtils.isNotBlank
structuredProperty.setQualifier(qualifier);
structuredProperty.setDataInfo(dataInfo);
return structuredProperty;
}
private static Field<String> mapStringField(String value, DataInfo dataInfo) {
if (value == null || StringUtils.isBlank(value)) {
return null;
}
final Field<String> stringField = new Field<>();
stringField.setValue(value);
stringField.setDataInfo(dataInfo);
return stringField;
}
private static KeyValue createCollectedFrom() {
KeyValue cf = new KeyValue();
cf.setValue(ORCID);
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
return cf;
}
private static KeyValue createHostedBy() {
KeyValue hb = new KeyValue();
hb.setValue("Unknown Repository");
hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
return hb;
}
private static StructuredProperty mapAuthorId(String orcidId) {
final StructuredProperty sp = new StructuredProperty();
sp.setValue(orcidId);
final Qualifier q = new Qualifier();
q.setClassid("ORCID");
q.setClassname("ORCID");
sp.setQualifier(q);
return sp;
}
}

View File

@ -1,107 +1,109 @@
package eu.dnetlib.doiboost.orcidnodoi.util;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import org.apache.commons.lang3.StringUtils;
package eu.dnetlib.doiboost.orcidnodoi.util;
import java.text.SimpleDateFormat;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
public class DumpToActionsUtility {
private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US);
private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US);
public static String getStringValue(final JsonObject root, final String key) {
if (root.has(key) && !root.get(key).isJsonNull())
return root.get(key).getAsString();
return null;
}
public static String getStringValue(final JsonObject root, final String key) {
if (root.has(key) && !root.get(key).isJsonNull())
return root.get(key).getAsString();
return null;
}
public static List<String> getArrayValues(final JsonObject root, final String key) {
if (root.has(key) && root.get(key).isJsonArray()) {
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
final List<String> result = new ArrayList<>();
public static List<String> getArrayValues(final JsonObject root, final String key) {
if (root.has(key) && root.get(key).isJsonArray()) {

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the null? Otherwise this would likely produce a NPE.

replaced null value with a more safe empty string

replaced null value with a more safe empty string
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
final List<String> result = new ArrayList<>();
asJsonArray.forEach(it -> {
if (StringUtils.isNotBlank(it.getAsString())) {
result.add(it.getAsString());
}
});
return result;
}
return new ArrayList<>();
}
asJsonArray.forEach(it -> {
if (StringUtils.isNotBlank(it.getAsString())) {
result.add(it.getAsString());
}
});
return result;
}
return new ArrayList<>();
}
public static List<JsonObject> getArrayObjects(final JsonObject root, final String key) {
if (root.has(key) && root.get(key).isJsonArray()) {
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
final List<JsonObject> result = new ArrayList<>();
asJsonArray.forEach(it -> {
if (it.getAsJsonObject() != null) {
result.add(it.getAsJsonObject());
}
});
return result;
}
return new ArrayList<>();
}
public static List<JsonObject> getArrayObjects(final JsonObject root, final String key) {
if (root.has(key) && root.get(key).isJsonArray()) {
final JsonArray asJsonArray = root.get(key).getAsJsonArray();
final List<JsonObject> result = new ArrayList<>();
asJsonArray.forEach(it -> {
if (it.getAsJsonObject() != null) {
result.add(it.getAsJsonObject());
}
});
return result;
}
return new ArrayList<>();
}
public static boolean isValidDate(final String date) {
return date.matches("\\d{4}-\\d{2}-\\d{2}");
}
public static boolean isValidDate(final String date) {
return date.matches("\\d{4}-\\d{2}-\\d{2}");
}
public static String now_ISO8601() { // NOPMD
String result;
synchronized (ISO8601FORMAT) {
result = ISO8601FORMAT.format(new Date());
}
//convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00
//- note the added colon for the Timezone
return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2);
}
public static String now_ISO8601() { // NOPMD
String result;
synchronized (ISO8601FORMAT) {
result = ISO8601FORMAT.format(new Date());
}
// convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00
// - note the added colon for the Timezone
return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2);
}
public static String getDefaultResulttype(final String cobjcategory) {
switch (cobjcategory) {
case "0029":
return "software";
case "0021":
case "0024":
case "0025":
case "0030":
return "dataset";
case "0000":
case "0010":
case "0018":
case "0020":
case "0022":
case "0023":
case "0026":
case "0027":
case "0028":
case "0037":
return "other";
case "0001":
case "0002":
case "0004":
case "0005":
case "0006":
case "0007":
case "0008":
case "0009":
case "0011":
case "0012":
case "0013":
case "0014":
case "0015":
case "0016":
case "0017":
case "0019":
case "0031":
case "0032":
return "publication";
default:
return "publication";
}
}
public static String getDefaultResulttype(final String cobjcategory) {
switch (cobjcategory) {
case "0029":
return "software";
case "0021":
case "0024":
case "0025":
case "0030":
return "dataset";
case "0000":
case "0010":
case "0018":
case "0020":
case "0022":
case "0023":
case "0026":
case "0027":
case "0028":
case "0037":
return "other";
case "0001":
case "0002":
case "0004":
case "0005":
case "0006":
case "0007":
case "0008":
case "0009":
case "0011":
case "0012":
case "0013":
case "0014":
case "0015":
case "0016":
case "0017":
case "0019":
case "0031":
case "0032":
return "publication";
default:
return "publication";
}
}
}

View File

@ -1,30 +1,32 @@
package eu.dnetlib.doiboost.orcidnodoi.util;
public class Pair<K, V> {
private K k;
private K k;
private V v;
private V v;
public Pair(K k, V v) {
this.k = k;
this.v = v;
}
public Pair(K k, V v) {
this.k = k;
this.v = v;
}
public K getKey() {
return k;
}
public K getKey() {
return k;
}
public V getValue() {
return v;
}
public V getValue() {
return v;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof Pair<?, ?>) {
Pair<?, ?> tmp = (Pair<?, ?>) obj;
return k.equals(tmp.getKey()) && v.equals(tmp.getValue());
} else return false;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof Pair<?, ?>) {
Pair<?, ?> tmp = (Pair<?, ?>) obj;
return k.equals(tmp.getKey()) && v.equals(tmp.getValue());
} else
return false;
}
}

View File

@ -54,7 +54,7 @@ public class OrcidClientTest {
}
// @Test
public void testLambdaFileParser() throws Exception {
private void testLambdaFileParser() throws Exception {
try (BufferedReader br = new BufferedReader(
new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
String line;

View File

@ -0,0 +1,76 @@
package eu.dnetlib.doiboost.orcidnodoi;
import static org.junit.jupiter.api.Assertions.*;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
public class PublicationToOafTest {
private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class);
@Test
// @Ignore
public void convertOafPublicationTest() throws Exception {
String jsonPublication = IOUtils
.toString(
PublicationToOafTest.class.getResourceAsStream("publication.json"));
JsonElement j = new JsonParser().parse(jsonPublication);
logger.info("json publication loaded: " + j.toString());
Publication oafPublication = (Publication) PublicationToOaf
.generatePublicationActionsFromDump(j.getAsJsonObject());
assertNotNull(oafPublication.getId());
assertNotNull(oafPublication.getOriginalId());
assertEquals(oafPublication.getOriginalId().get(0), "60153327");
logger.info("oafPublication.getId(): " + oafPublication.getId());
assertEquals(
oafPublication.getTitle().get(0).getValue(),
"Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp");
assertNotNull(oafPublication.getLastupdatetimestamp());
assertNotNull(oafPublication.getDateofcollection());
assertNotNull(oafPublication.getDateoftransformation());
assertTrue(oafPublication.getAuthor().size() == 7);
oafPublication.getAuthor().forEach(a -> {
assertNotNull(a.getFullname());
assertNotNull(a.getRank());
logger.info("a.getFullname(): " + a.getFullname());
if (a.getName() != null) {
logger.info("a.getName(): " + a.getName());
}
if (a.getSurname() != null) {
logger.info("a.getSurname(): " + a.getSurname());
}
logger.info("a.getRank(): " + a.getRank());
if (a.getPid() != null) {
logger.info("a.getPid(): " + a.getPid().get(0).getValue());
}
});
assertNotNull(oafPublication.getCollectedfrom());
if (oafPublication.getSource() != null) {
logger.info((oafPublication.getSource().get(0).getValue()));
}
if (oafPublication.getExternalReference() != null) {
oafPublication.getExternalReference().forEach(e -> {
assertNotNull(e.getRefidentifier());
assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types");
});
}
assertNotNull(oafPublication.getInstance());
oafPublication.getInstance().forEach(i -> {
assertNotNull(i.getInstancetype().getClassid());
logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid());
assertNotNull(i.getInstancetype().getClassname());
logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname());
});
}
}

View File

@ -95,7 +95,8 @@ public class OrcidNoDoiTest {
}
@Test
public void authorMatchTest() throws Exception {
@Ignore
private void authorMatchTest() throws Exception {
logger.info("running authorSimpleMatchTest ....");
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
AuthorData author = new AuthorData();

View File

@ -0,0 +1 @@
{"oid":"0000-0002-4147-3387","id":"60153327","sourceName":"The Chinese University of Hong Kong","type":"conference-paper","titles":["Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"],"extIds":[{"type":"wosuid","value":"000425015800225","relationShip":"self"},{"type":"other-id","value":"441f521e-ab19-448d-ba32-83157b348ada","relationShip":"self"}],"publicationDates":[],"contributors":[{"sequence":"1","oid":"0000-0002-4147-3387","name":"Elaine","surname":"Chow","creditName":"Elaine Chow"},{"sequence":"2","creditName":"Victor Tsui"},{"sequence":"3","creditName":"Achim Müller"},{"sequence":"4","creditName":"Vincy Lee"},{"sequence":"5","creditName":"Lucia Krivánekova"},{"sequence":"6","creditName":"Roland Krivánek"},{"sequence":"7","creditName":"Juliana CN Chan"}]}