orcid-no-doi #43

claudio.atzori merged 45 commits from enrico.ottonello/dnet-hadoop:orcid-no-doi into master 2020-12-02 10:55:12 +01:00
8 changed files with 650 additions and 532 deletions
Showing only changes of commit 1729cc5cf3 - Show all commits

View File

@ -1,420 +0,0 @@
package eu.dnetlib.doiboost.orcidnodoi.oaf;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks;
import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility;
import eu.dnetlib.doiboost.orcidnodoi.util.Pair;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*;
public class OrcidWorkToOAF {
static Logger logger = LoggerFactory.getLogger(OrcidWorkToOAF.class);
public static final String ORCID = "ORCID";
public final static String orcidPREFIX = "orcid_______";
public static final String OPENAIRE_PREFIX = "openaire____";
public static final String SEPARATOR = "::";
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
put("ark".toLowerCase(), new Pair<>("ark", "ark"));
put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv"));
put("pmc".toLowerCase(), new Pair<>("pmc", "pmc"));
put("pmid".toLowerCase(), new Pair<>("pmid", "pmid"));
put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid"));
put("urn".toLowerCase(), new Pair<>("urn", "urn"));
static Map<String, Map<String, String>> typologiesMapping;
static {
try {
final String tt = IOUtils.toString(OrcidWorkToOAF.class.getResourceAsStream(
typologiesMapping = new Gson().fromJson(tt, Map.class);
} catch (final Exception e) {
logger.error("loading typologies", e);
public static final String PID_TYPES = "dnet:pid_types";
public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement, final String setName) {
if (!isValid(rootElement/*, context*/)) { return null; }
Publication publication = new Publication();
final DataInfo dataInfo = new DataInfo();
publication.setLastupdatetimestamp(new Date().getTime());
// Adding external ids
.forEach(jsonExtId -> {
final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue();
final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey();
final String extId = getStringValue(rootElement, jsonExtId);
if (StringUtils.isNotBlank(extId)) {
convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types"));
// Adding source
// final String source = getStringValue(rootElement, "source");
// if (StringUtils.isNotBlank(source)) {
// metadata.addSource(StringField.newBuilder().setValue(source).build());
// }
// Adding titles
final List<String> titles = createRepeatedField(rootElement, "titles");
if (titles==null || titles.isEmpty()) {
// context.incrementCounter("filtered", "title_not_found", 1);
return null;
Qualifier q = mapQualifier("main title","main title","dnet:dataCite_title","dnet:dataCite_title");
.map(t -> {
return mapStructuredProperty(t, q, null);
// Adding identifier
final String id = getStringValue(rootElement, "id");
String sourceId = null;
if (id != null) {
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase()));
} else {
String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(","));
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase()));
// Adding relevant date
settingRelevantDate(rootElement, publication, "publication_date", "issued", true);
// Adding collectedfrom
// Adding type
final String type = getStringValue(rootElement, "type");
String cobjValue = "";
if (StringUtils.isNotBlank(type)) {
publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource"));
final String typeValue = typologiesMapping.get(type).get("value");
cobjValue = typologiesMapping.get(type).get("cobj");
final Instance instance = new Instance();
// Adding hostedby
// Adding url
final List<String> urls = createRepeatedField(rootElement, "urls");
if (urls!=null && !urls.isEmpty()) {
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
instance.setDateofacceptance(mapStringField(pubDate, null));
// Adding accessright
instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes"));
// Adding type
instance.setInstancetype(mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource"));
} else {
// context.incrementCounter("filtered", "type_not_found", 1);
return null;
// Adding authors
final List<Author> authors = createAuthors(rootElement);
if (authors != null && authors.size() > 0) {
} else {
// context.incrementCounter("filtered", "author_not_found", 1);
return null;
String classValue = getDefaultResulttype(cobjValue);
publication.setResulttype(mapQualifier(classValue, classValue,"dnet:result_typologies", "dnet:result_typologies"));
return publication;
public static List<Author> createAuthors(final JsonObject root) {
final String authorsJSONFieldName = "authors";
if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) {
final List<Author> authors = new ArrayList<>();
final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName);
int firstCounter = 0;
int defaultCounter = 0;
int rank = 1;
int currentRank = 0;
for (final JsonElement item : jsonAuthors) {
final JsonObject jsonAuthor = item.getAsJsonObject();
final Author author = new Author();
if (item.isJsonObject()) {
final String surname = getStringValue(jsonAuthor, "surname");
final String name = getStringValue(jsonAuthor, "name");
final String oid = getStringValue(jsonAuthor, "oid");
final String seq = getStringValue(jsonAuthor, "seq");
if (StringUtils.isNotBlank(seq)) {
if (seq.equals("first")) {
firstCounter += 1;
rank = firstCounter;
} else if (seq.equals("additional")) {
rank = currentRank + 1;
} else {
defaultCounter += 1;
rank = defaultCounter;
if (StringUtils.isNotBlank(oid)) {
author.setFullname(name + " " + surname);
if (StringUtils.isNotBlank(name)) {
if (StringUtils.isNotBlank(surname)) {
} else {
String fullname = "";
if (StringUtils.isNotBlank(name)) {
fullname = name;
} else {
if (StringUtils.isNotBlank(surname)) {
fullname = surname;
PacePerson p = new PacePerson(fullname, false);
if (p.isAccurate()) {
else {
currentRank = rank;
return authors;
return null;
private static List<String> createRepeatedField(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) { return null; }
if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; }
if (rootElement.get(fieldName).isJsonArray()) {
if (!isValidJsonArray(rootElement, fieldName)) { return null; }
return getArrayValues(rootElement, fieldName);
} else {
String field = getStringValue(rootElement, fieldName);
return Arrays.asList(cleanField(field));
private static String cleanField(String value) {
if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
value = value.substring(1, value.length() - 1);
return value;
private static void settingRelevantDate(final JsonObject rootElement,
final Publication publication,
final String jsonKey,
final String dictionaryKey,
final boolean addToDateOfAcceptance) {
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
if (addToDateOfAcceptance) {
publication.setDateofacceptance(mapStringField(pubDate, null));
Qualifier q = mapQualifier(dictionaryKey,dictionaryKey,"dnet:dataCite_date","dnet:dataCite_date");
.map(r -> {
return mapStructuredProperty(r, q, null);
private static String getPublicationDate(final JsonObject rootElement,
final String jsonKey) {
final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey);
if (pubDateJson == null) { return null; }
final String year = getStringValue(pubDateJson, "year");
final String month = getStringValue(pubDateJson, "month");
final String day = getStringValue(pubDateJson, "day");
if (StringUtils.isBlank(year)) { return null; }
String pubDate = "".concat(year);
if (StringUtils.isNotBlank(month)) {
pubDate = pubDate.concat("-" + month);
if (StringUtils.isNotBlank(day)) {
pubDate = pubDate.concat("-" + day);
} else {
pubDate += "-01";
} else {
pubDate += "-01-01";
if (isValidDate(pubDate)) { return pubDate; }
return null;
protected static boolean isValid(final JsonObject rootElement/*, final Reporter context*/) {
final String type = getStringValue(rootElement, "type");
if (!typologiesMapping.containsKey(type)) {
// context.incrementCounter("filtered", "unknowntype_" + type, 1);
return false;
if (!isValidJsonArray(rootElement, "titles")) {
// context.incrementCounter("filtered", "invalid_title", 1);
return false;
return true;
private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) { return false; }
final JsonElement jsonElement = rootElement.get(fieldName);
if (jsonElement.isJsonNull()) { return false; }
if (jsonElement.isJsonArray()) {
final JsonArray jsonArray = jsonElement.getAsJsonArray();
if (jsonArray.isJsonNull()) { return false; }
if (jsonArray.get(0).isJsonNull()) { return false; }
return true;
private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) {
final Qualifier qualifier = new Qualifier();
return qualifier;
private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, String schemeName) {
ExternalReference ex = new ExternalReference();
ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName ));
return ex;
private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) {
if (value == null | StringUtils.isBlank(value)) {
return null;
final StructuredProperty structuredProperty = new StructuredProperty();
return structuredProperty;
private static Field<String> mapStringField(String value, DataInfo dataInfo) {
if (value == null || StringUtils.isBlank(value)) {
return null;
final Field<String> stringField = new Field<>();
return stringField;
private static KeyValue createCollectedFrom() {
KeyValue cf = new KeyValue();
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
return cf;
private static KeyValue createHostedBy() {
KeyValue hb = new KeyValue();
hb.setValue("Unknown Repository");
hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
return hb;
private static StructuredProperty mapAuthorId(String orcidId) {
final StructuredProperty sp = new StructuredProperty();
final Qualifier q = new Qualifier();
return sp;

View File

@ -0,0 +1,456 @@
package eu.dnetlib.doiboost.orcidnodoi.oaf;
import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility;
import eu.dnetlib.doiboost.orcidnodoi.util.Pair;
public class PublicationToOaf {
static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class);
public static final String ORCID = "ORCID";
public final static String orcidPREFIX = "orcid_______";
public static final String OPENAIRE_PREFIX = "openaire____";
public static final String SEPARATOR = "::";
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
put("ark".toLowerCase(), new Pair<>("ark", "ark"));
put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv"));
put("pmc".toLowerCase(), new Pair<>("pmc", "pmc"));
put("pmid".toLowerCase(), new Pair<>("pmid", "pmid"));
put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid"));
put("urn".toLowerCase(), new Pair<>("urn", "urn"));
static Map<String, Map<String, String>> typologiesMapping;
static {
try {
final String tt = IOUtils
typologiesMapping = new Gson().fromJson(tt, Map.class);
} catch (final Exception e) {
logger.error("loading typologies", e);
public static final String PID_TYPES = "dnet:pid_types";
public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement) {
logger.debug("generatePublicationActionsFromDump ...");
if (!isValid(rootElement/* , context */)) {
logger.error("publication not valid");
return null;
Publication publication = new Publication();
final DataInfo dataInfo = new DataInfo();
publication.setLastupdatetimestamp(new Date().getTime());
// Adding external ids
.forEach(jsonExtId -> {
final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue();
final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey();
final String extId = getStringValue(rootElement, jsonExtId);
if (StringUtils.isNotBlank(extId)) {
convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types"));
// Adding source
final String source = getStringValue(rootElement, "sourceName");
if (StringUtils.isNotBlank(source)) {
publication.setSource(Arrays.asList(mapStringField(source, null)));

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a filter for null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD
e -> {
return (Publication) publicationToOaf
.filter(p -> p != null);

yes, there is a filter for null value: JavaRDD<Publication> oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
// Adding titles
final List<String> titles = createRepeatedField(rootElement, "titles");
if (titles == null || titles.isEmpty()) {
logger.error("titles not found");
// context.incrementCounter("filtered", "title_not_found", 1);
return null;

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a filter for null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);

yes, there is a filter for null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
.map(t -> {
return mapStructuredProperty(t, q, null);
// Adding identifier
final String id = getStringValue(rootElement, "id");
String sourceId = null;
if (id != null) {
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase()));
} else {
String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(","));
sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase()));
// Adding relevant date
settingRelevantDate(rootElement, publication, "publication_date", "issued", true);
// Adding collectedfrom
// Adding type
final String type = getStringValue(rootElement, "type");
String cobjValue = "";
if (StringUtils.isNotBlank(type)) {
publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource"));
final String typeValue = typologiesMapping.get(type).get("value");
cobjValue = typologiesMapping.get(type).get("cobj");
final Instance instance = new Instance();
// Adding hostedby
// Adding url
final List<String> urls = createRepeatedField(rootElement, "urls");
if (urls != null && !urls.isEmpty()) {
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
instance.setDateofacceptance(mapStringField(pubDate, null));

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a check on null value

yes, there is a check on null value
// Adding accessright
instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes"));
// Adding type
mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource"));
} else {
logger.error("type not found");
// context.incrementCounter("filtered", "type_not_found", 1);
return null;
// Adding authors
final List<Author> authors = createAuthors(rootElement);
if (authors != null && authors.size() > 0) {
} else {
logger.error("authors not found");
// context.incrementCounter("filtered", "author_not_found", 1);
return null;
String classValue = getDefaultResulttype(cobjValue);
.setResulttype(mapQualifier(classValue, classValue, "dnet:result_typologies", "dnet:result_typologies"));
return publication;
public static List<Author> createAuthors(final JsonObject root) {
final String authorsJSONFieldName = "contributors";
if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) {
final List<Author> authors = new ArrayList<>();
final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName);
int firstCounter = 0;
int defaultCounter = 0;
int rank = 1;
int currentRank = 0;
for (final JsonElement item : jsonAuthors) {
final JsonObject jsonAuthor = item.getAsJsonObject();
final Author author = new Author();
if (item.isJsonObject()) {
final String creditname = getStringValue(jsonAuthor, "creditName");
final String surname = getStringValue(jsonAuthor, "surname");
final String name = getStringValue(jsonAuthor, "name");
final String oid = getStringValue(jsonAuthor, "oid");
final String seq = getStringValue(jsonAuthor, "sequence");
if (StringUtils.isNotBlank(seq)) {
if (seq.equals("first")) {
firstCounter += 1;
rank = firstCounter;
} else if (seq.equals("additional")) {
rank = currentRank + 1;
} else {
defaultCounter += 1;
rank = defaultCounter;
if (StringUtils.isNotBlank(oid)) {
author.setFullname(name + " " + surname);

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a filter on null value:
JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);

yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
if (StringUtils.isNotBlank(name)) {
if (StringUtils.isNotBlank(surname)) {
} else {
PacePerson p = new PacePerson(creditname, false);
if (p.isAccurate()) {

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);

yes, there is a filter on null value: JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { return (Publication) publicationToOaf .generatePublicationActionsFromJson(e._2()); }) .filter(p -> p != null);
} else {
currentRank = rank;
return authors;
return null;
private static List<String> createRepeatedField(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) {
return null;
if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) {
return null;
if (rootElement.get(fieldName).isJsonArray()) {
if (!isValidJsonArray(rootElement, fieldName)) {
return null;
return getArrayValues(rootElement, fieldName);
} else {
String field = getStringValue(rootElement, fieldName);
return Arrays.asList(cleanField(field));
private static String cleanField(String value) {
if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
value = value.substring(1, value.length() - 1);
return value;
private static void settingRelevantDate(final JsonObject rootElement,
final Publication publication,
final String jsonKey,
final String dictionaryKey,
final boolean addToDateOfAcceptance) {
final String pubDate = getPublicationDate(rootElement, "publication_date");
if (StringUtils.isNotBlank(pubDate)) {
if (addToDateOfAcceptance) {
publication.setDateofacceptance(mapStringField(pubDate, null));
Qualifier q = mapQualifier(dictionaryKey, dictionaryKey, "dnet:dataCite_date", "dnet:dataCite_date");
.map(r -> {
return mapStructuredProperty(r, q, null);
private static String getPublicationDate(final JsonObject rootElement,
final String jsonKey) {
final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey);
if (pubDateJson == null) {
return null;

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a check on null value

yes, there is a check on null value
final String year = getStringValue(pubDateJson, "year");
final String month = getStringValue(pubDateJson, "month");
final String day = getStringValue(pubDateJson, "day");
if (StringUtils.isBlank(year)) {

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a check on null value

yes, there is a check on null value
return null;
String pubDate = "".concat(year);

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a check on null value

yes, there is a check on null value
if (StringUtils.isNotBlank(month)) {
pubDate = pubDate.concat("-" + month);
if (StringUtils.isNotBlank(day)) {
pubDate = pubDate.concat("-" + day);

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is a check on null value

yes, there is a check on null value
} else {
pubDate += "-01";
} else {
pubDate += "-01-01";
if (isValidDate(pubDate)) {
return pubDate;
return null;
protected static boolean isValid(final JsonObject rootElement/* , final Reporter context */) {
final String type = getStringValue(rootElement, "type");
if (!typologiesMapping.containsKey(type)) {
logger.error("unknowntype_" + type);
// context.incrementCounter("filtered", "unknowntype_" + type, 1);
return false;
if (!isValidJsonArray(rootElement, "titles")) {
// context.incrementCounter("filtered", "invalid_title", 1);
return false;
return true;
private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) {
if (!rootElement.has(fieldName)) {
return false;
final JsonElement jsonElement = rootElement.get(fieldName);
if (jsonElement.isJsonNull()) {
return false;
if (jsonElement.isJsonArray()) {
final JsonArray jsonArray = jsonElement.getAsJsonArray();
if (jsonArray.isJsonNull()) {
return false;
if (jsonArray.get(0).isJsonNull()) {
return false;
return true;
private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) {
final Qualifier qualifier = new Qualifier();
return qualifier;

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is this ckeck on the value: StringUtils.isNotBlank

yes, there is this ckeck on the value: StringUtils.isNotBlank
private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId,
String schemeName) {
ExternalReference ex = new ExternalReference();
ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName));
return ex;
private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) {
if (value == null | StringUtils.isBlank(value)) {
return null;
final StructuredProperty structuredProperty = new StructuredProperty();

Is the caller expecting the null? Otherwise this would likely produce a NPE.

Is the caller expecting the `null`? Otherwise this would likely produce a NPE.

yes, there is this ckeck on the value: StringUtils.isNotBlank

yes, there is this ckeck on the value: StringUtils.isNotBlank
return structuredProperty;
private static Field<String> mapStringField(String value, DataInfo dataInfo) {
if (value == null || StringUtils.isBlank(value)) {
return null;
final Field<String> stringField = new Field<>();
return stringField;
private static KeyValue createCollectedFrom() {
KeyValue cf = new KeyValue();
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
return cf;
private static KeyValue createHostedBy() {
KeyValue hb = new KeyValue();
hb.setValue("Unknown Repository");
hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
return hb;
private static StructuredProperty mapAuthorId(String orcidId) {
final StructuredProperty sp = new StructuredProperty();
final Qualifier q = new Qualifier();
return sp;

View File

@ -1,107 +1,109 @@
package eu.dnetlib.doiboost.orcidnodoi.util;
import com.google.gson.JsonArray; package eu.dnetlib.doiboost.orcidnodoi.util;
import com.google.gson.JsonObject;
import org.apache.commons.lang3.StringUtils;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.*; import java.util.*;
import org.apache.commons.lang3.StringUtils;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
public class DumpToActionsUtility { public class DumpToActionsUtility {
private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US); private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US);
public static String getStringValue(final JsonObject root, final String key) { public static String getStringValue(final JsonObject root, final String key) {
if (root.has(key) && !root.get(key).isJsonNull()) if (root.has(key) && !root.get(key).isJsonNull())
return root.get(key).getAsString(); return root.get(key).getAsString();
return null; return null;
} }
public static List<String> getArrayValues(final JsonObject root, final String key) { public static List<String> getArrayValues(final JsonObject root, final String key) {
if (root.has(key) && root.get(key).isJsonArray()) { if (root.has(key) && root.get(key).isJsonArray()) {
final JsonArray asJsonArray = root.get(key).getAsJsonArray(); final JsonArray asJsonArray = root.get(key).getAsJsonArray();
final List<String> result = new ArrayList<>(); final List<String> result = new ArrayList<>();
asJsonArray.forEach(it -> {
if (StringUtils.isNotBlank(it.getAsString())) {
return result;
return new ArrayList<>();
asJsonArray.forEach(it -> { public static List<JsonObject> getArrayObjects(final JsonObject root, final String key) {
if (StringUtils.isNotBlank(it.getAsString())) { if (root.has(key) && root.get(key).isJsonArray()) {
result.add(it.getAsString()); final JsonArray asJsonArray = root.get(key).getAsJsonArray();
} final List<JsonObject> result = new ArrayList<>();
}); asJsonArray.forEach(it -> {
return result; if (it.getAsJsonObject() != null) {
} result.add(it.getAsJsonObject());
return new ArrayList<>(); }
} });
public static List<JsonObject> getArrayObjects(final JsonObject root, final String key) { return result;
if (root.has(key) && root.get(key).isJsonArray()) { }
final JsonArray asJsonArray = root.get(key).getAsJsonArray(); return new ArrayList<>();
final List<JsonObject> result = new ArrayList<>(); }
asJsonArray.forEach(it -> {
if (it.getAsJsonObject() != null) {
return result;
return new ArrayList<>();
public static boolean isValidDate(final String date) { public static boolean isValidDate(final String date) {
return date.matches("\\d{4}-\\d{2}-\\d{2}"); return date.matches("\\d{4}-\\d{2}-\\d{2}");
} }
public static String now_ISO8601() { // NOPMD public static String now_ISO8601() { // NOPMD
String result; String result;
synchronized (ISO8601FORMAT) { synchronized (ISO8601FORMAT) {
result = ISO8601FORMAT.format(new Date()); result = ISO8601FORMAT.format(new Date());
} }
//convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00 // convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00
//- note the added colon for the Timezone // - note the added colon for the Timezone
return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2); return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2);
} }
public static String getDefaultResulttype(final String cobjcategory) { public static String getDefaultResulttype(final String cobjcategory) {
switch (cobjcategory) { switch (cobjcategory) {
case "0029": case "0029":
return "software"; return "software";
case "0021": case "0021":
case "0024": case "0024":
case "0025": case "0025":
case "0030": case "0030":
return "dataset"; return "dataset";
case "0000": case "0000":
case "0010": case "0010":
case "0018": case "0018":
case "0020": case "0020":
case "0022": case "0022":
case "0023": case "0023":
case "0026": case "0026":
case "0027": case "0027":
case "0028": case "0028":
case "0037": case "0037":
return "other"; return "other";
case "0001": case "0001":
case "0002": case "0002":
case "0004": case "0004":
case "0005": case "0005":
case "0006": case "0006":
case "0007": case "0007":
case "0008": case "0008":
case "0009": case "0009":
case "0011": case "0011":
case "0012": case "0012":
case "0013": case "0013":
case "0014": case "0014":
case "0015": case "0015":
case "0016": case "0016":
case "0017": case "0017":
case "0019": case "0019":
case "0031": case "0031":
case "0032": case "0032":
return "publication"; return "publication";
default: default:
return "publication"; return "publication";
} }
} }
} }

View File

@ -1,30 +1,32 @@
package eu.dnetlib.doiboost.orcidnodoi.util; package eu.dnetlib.doiboost.orcidnodoi.util;
public class Pair<K, V> { public class Pair<K, V> {
private K k; private K k;
private V v; private V v;
public Pair(K k, V v) { public Pair(K k, V v) {
this.k = k; this.k = k;
this.v = v; this.v = v;
} }
public K getKey() { public K getKey() {
return k; return k;
} }
public V getValue() { public V getValue() {
return v; return v;
} }
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (obj instanceof Pair<?, ?>) { if (obj instanceof Pair<?, ?>) {
Pair<?, ?> tmp = (Pair<?, ?>) obj; Pair<?, ?> tmp = (Pair<?, ?>) obj;
return k.equals(tmp.getKey()) && v.equals(tmp.getValue()); return k.equals(tmp.getKey()) && v.equals(tmp.getValue());
} else return false; } else
} return false;
} }

View File

@ -54,7 +54,7 @@ public class OrcidClientTest {
} }
// @Test // @Test
public void testLambdaFileParser() throws Exception { private void testLambdaFileParser() throws Exception {
try (BufferedReader br = new BufferedReader( try (BufferedReader br = new BufferedReader(
new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) { new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
String line; String line;

View File

@ -0,0 +1,76 @@
package eu.dnetlib.doiboost.orcidnodoi;
import static org.junit.jupiter.api.Assertions.*;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
public class PublicationToOafTest {
private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class);
// @Ignore
public void convertOafPublicationTest() throws Exception {
String jsonPublication = IOUtils
JsonElement j = new JsonParser().parse(jsonPublication);
logger.info("json publication loaded: " + j.toString());
Publication oafPublication = (Publication) PublicationToOaf
assertEquals(oafPublication.getOriginalId().get(0), "60153327");
logger.info("oafPublication.getId(): " + oafPublication.getId());
"Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp");
assertTrue(oafPublication.getAuthor().size() == 7);
oafPublication.getAuthor().forEach(a -> {
logger.info("a.getFullname(): " + a.getFullname());
if (a.getName() != null) {
logger.info("a.getName(): " + a.getName());
if (a.getSurname() != null) {
logger.info("a.getSurname(): " + a.getSurname());
logger.info("a.getRank(): " + a.getRank());
if (a.getPid() != null) {
logger.info("a.getPid(): " + a.getPid().get(0).getValue());
if (oafPublication.getSource() != null) {
if (oafPublication.getExternalReference() != null) {
oafPublication.getExternalReference().forEach(e -> {
assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types");
oafPublication.getInstance().forEach(i -> {
logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid());
logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname());

View File

@ -95,7 +95,8 @@ public class OrcidNoDoiTest {
} }
@Test @Test
public void authorMatchTest() throws Exception { @Ignore
private void authorMatchTest() throws Exception {
logger.info("running authorSimpleMatchTest ...."); logger.info("running authorSimpleMatchTest ....");
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
AuthorData author = new AuthorData(); AuthorData author = new AuthorData();

View File

@ -0,0 +1 @@
{"oid":"0000-0002-4147-3387","id":"60153327","sourceName":"The Chinese University of Hong Kong","type":"conference-paper","titles":["Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"],"extIds":[{"type":"wosuid","value":"000425015800225","relationShip":"self"},{"type":"other-id","value":"441f521e-ab19-448d-ba32-83157b348ada","relationShip":"self"}],"publicationDates":[],"contributors":[{"sequence":"1","oid":"0000-0002-4147-3387","name":"Elaine","surname":"Chow","creditName":"Elaine Chow"},{"sequence":"2","creditName":"Victor Tsui"},{"sequence":"3","creditName":"Achim Müller"},{"sequence":"4","creditName":"Vincy Lee"},{"sequence":"5","creditName":"Lucia Krivánekova"},{"sequence":"6","creditName":"Roland Krivánek"},{"sequence":"7","creditName":"Juliana CN Chan"}]}