From 1729cc5cf320c32cdafa2523d884d965ccefdc98 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 2 Jul 2020 18:46:20 +0200 Subject: [PATCH] publication conversion from json to oaf test --- .../orcidnodoi/oaf/OrcidWorkToOAF.java | 420 ---------------- .../orcidnodoi/oaf/PublicationToOaf.java | 456 ++++++++++++++++++ .../orcidnodoi/util/DumpToActionsUtility.java | 184 +++---- .../doiboost/orcidnodoi/util/Pair.java | 40 +- .../doiboost/orcid/OrcidClientTest.java | 2 +- .../orcidnodoi/PublicationToOafTest.java | 76 +++ .../orcidnodoi/xml/OrcidNoDoiTest.java | 3 +- .../doiboost/orcidnodoi/publication.json | 1 + 8 files changed, 650 insertions(+), 532 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java deleted file mode 100644 index 673abb407..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java +++ /dev/null @@ -1,420 +0,0 @@ - -package eu.dnetlib.doiboost.orcidnodoi.oaf; - -import com.google.gson.Gson; -import com.google.gson.JsonArray; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import eu.dnetlib.dhp.common.PacePerson; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks; -import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; -import eu.dnetlib.doiboost.orcidnodoi.util.Pair; -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; -import java.util.stream.Collectors; - -import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*; - -public class OrcidWorkToOAF { - - static Logger logger = LoggerFactory.getLogger(OrcidWorkToOAF.class); - - public static final String ORCID = "ORCID"; - public final static String orcidPREFIX = "orcid_______"; - public static final String OPENAIRE_PREFIX = "openaire____"; - public static final String SEPARATOR = "::"; - - private static Map> datasources = new HashMap>() { - - { - put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); - - } - }; - - // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname - private static Map> externalIds = new HashMap>() { - - { - put("ark".toLowerCase(), new Pair<>("ark", "ark")); - put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); - put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); - put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); - put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); - put("urn".toLowerCase(), new Pair<>("urn", "urn")); - } - }; - - static Map> typologiesMapping; - - static { - try { - final String tt = IOUtils.toString(OrcidWorkToOAF.class.getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json")); - typologiesMapping = new Gson().fromJson(tt, Map.class); - } catch (final Exception e) { - logger.error("loading typologies", e); - } - } - - public static final String PID_TYPES = "dnet:pid_types"; - - public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement, final String setName) { - - if (!isValid(rootElement/*, context*/)) { return null; } - - Publication publication = new Publication(); - - final DataInfo dataInfo = new DataInfo(); - dataInfo.setDeletedbyinference(false); - dataInfo.setInferred(false); - dataInfo.setTrust("0.9"); - dataInfo.setProvenanceaction( - mapQualifier( - "sysimport:actionset:orcidworks-no-doi", - "sysimport:actionset:orcidworks-no-doi", - "dnet:provenanceActions", - "dnet:provenanceActions")); - publication.setDataInfo(dataInfo); - - publication.setLastupdatetimestamp(new Date().getTime()); - - publication.setDateofcollection("2019-10-22"); - publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601()); - - // Adding external ids - externalIds.keySet().stream() - .forEach(jsonExtId -> { - final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); - final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); - final String extId = getStringValue(rootElement, jsonExtId); - if (StringUtils.isNotBlank(extId)) { - publication.getExternalReference().add( - convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types")); - } - }); - - // Adding source -// final String source = getStringValue(rootElement, "source"); -// if (StringUtils.isNotBlank(source)) { -// metadata.addSource(StringField.newBuilder().setValue(source).build()); -// } - - // Adding titles - final List titles = createRepeatedField(rootElement, "titles"); - if (titles==null || titles.isEmpty()) { -// context.incrementCounter("filtered", "title_not_found", 1); - return null; - } - Qualifier q = mapQualifier("main title","main title","dnet:dataCite_title","dnet:dataCite_title"); - publication.setTitle( - titles - .stream() - .map(t -> { - return mapStructuredProperty(t, q, null); - }) - .collect(Collectors.toList())); - // Adding identifier - final String id = getStringValue(rootElement, "id"); - String sourceId = null; - if (id != null) { - publication.setOriginalId(Arrays.asList(id)); - sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase())); - } else { - String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(",")); - sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase())); - } - publication.setId(sourceId); - - // Adding relevant date - settingRelevantDate(rootElement, publication, "publication_date", "issued", true); - - // Adding collectedfrom - publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); - - // Adding type - final String type = getStringValue(rootElement, "type"); - String cobjValue = ""; - if (StringUtils.isNotBlank(type)) { - publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource")); - - final String typeValue = typologiesMapping.get(type).get("value"); - cobjValue = typologiesMapping.get(type).get("cobj"); - final Instance instance = new Instance(); - - // Adding hostedby - instance.setHostedby(createHostedBy()); - - // Adding url - final List urls = createRepeatedField(rootElement, "urls"); - if (urls!=null && !urls.isEmpty()) { - instance.setUrl(urls); - } - - final String pubDate = getPublicationDate(rootElement, "publication_date"); - if (StringUtils.isNotBlank(pubDate)) { - instance.setDateofacceptance(mapStringField(pubDate, null)); - } - - instance.setCollectedfrom(createCollectedFrom()); - - // Adding accessright - instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes")); - - // Adding type - instance.setInstancetype(mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource")); - - publication.setInstance(Arrays.asList(instance)); - } else { -// context.incrementCounter("filtered", "type_not_found", 1); - return null; - } - - // Adding authors - final List authors = createAuthors(rootElement); - if (authors != null && authors.size() > 0) { - publication.setAuthor(authors); - } else { -// context.incrementCounter("filtered", "author_not_found", 1); - return null; - } - String classValue = getDefaultResulttype(cobjValue); - publication.setResulttype(mapQualifier(classValue, classValue,"dnet:result_typologies", "dnet:result_typologies")); - return publication; - } - - public static List createAuthors(final JsonObject root) { - - final String authorsJSONFieldName = "authors"; - - if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) { - - final List authors = new ArrayList<>(); - final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName); - int firstCounter = 0; - int defaultCounter = 0; - int rank = 1; - int currentRank = 0; - - for (final JsonElement item : jsonAuthors) { - final JsonObject jsonAuthor = item.getAsJsonObject(); - final Author author = new Author(); - if (item.isJsonObject()) { - final String surname = getStringValue(jsonAuthor, "surname"); - final String name = getStringValue(jsonAuthor, "name"); - final String oid = getStringValue(jsonAuthor, "oid"); - final String seq = getStringValue(jsonAuthor, "seq"); - if (StringUtils.isNotBlank(seq)) { - if (seq.equals("first")) { - firstCounter += 1; - rank = firstCounter; - - } else if (seq.equals("additional")) { - rank = currentRank + 1; - } else { - defaultCounter += 1; - rank = defaultCounter; - } - } - - if (StringUtils.isNotBlank(oid)) { - author.setPid(Arrays.asList(mapAuthorId(oid))); - author.setFullname(name + " " + surname); - if (StringUtils.isNotBlank(name)) { - author.setName(name); - } - if (StringUtils.isNotBlank(surname)) { - author.setSurname(surname); - } - } else { - String fullname = ""; - if (StringUtils.isNotBlank(name)) { - fullname = name; - } else { - if (StringUtils.isNotBlank(surname)) { - fullname = surname; - } - } - PacePerson p = new PacePerson(fullname, false); - if (p.isAccurate()) { - author.setName(p.getNormalisedFirstName()); - author.setSurname(p.getNormalisedSurname()); - author.setFullname(p.getNormalisedFullname()); - } - else { - author.setFullname(fullname); - } - } - } - author.setRank(rank); - authors.add(author); - currentRank = rank; - } - return authors; - - } - return null; - } - - private static List createRepeatedField(final JsonObject rootElement, final String fieldName) { - if (!rootElement.has(fieldName)) { return null; } - if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; } - if (rootElement.get(fieldName).isJsonArray()) { - if (!isValidJsonArray(rootElement, fieldName)) { return null; } - return getArrayValues(rootElement, fieldName); - } else { - String field = getStringValue(rootElement, fieldName); - return Arrays.asList(cleanField(field)); - } - } - - private static String cleanField(String value) { - if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') { - value = value.substring(1, value.length() - 1); - } - return value; - } - - private static void settingRelevantDate(final JsonObject rootElement, - final Publication publication, - final String jsonKey, - final String dictionaryKey, - final boolean addToDateOfAcceptance) { - - final String pubDate = getPublicationDate(rootElement, "publication_date"); - if (StringUtils.isNotBlank(pubDate)) { - if (addToDateOfAcceptance) { - publication.setDateofacceptance(mapStringField(pubDate, null)); - } - Qualifier q = mapQualifier(dictionaryKey,dictionaryKey,"dnet:dataCite_date","dnet:dataCite_date"); - publication.setRelevantdate( - Arrays.asList(pubDate) - .stream() - .map(r -> { - return mapStructuredProperty(r, q, null); - }) - .collect(Collectors.toList())); - } - } - - private static String getPublicationDate(final JsonObject rootElement, - final String jsonKey) { - - final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); - if (pubDateJson == null) { return null; } - final String year = getStringValue(pubDateJson, "year"); - final String month = getStringValue(pubDateJson, "month"); - final String day = getStringValue(pubDateJson, "day"); - - if (StringUtils.isBlank(year)) { return null; } - String pubDate = "".concat(year); - if (StringUtils.isNotBlank(month)) { - pubDate = pubDate.concat("-" + month); - if (StringUtils.isNotBlank(day)) { - pubDate = pubDate.concat("-" + day); - } else { - pubDate += "-01"; - } - } else { - pubDate += "-01-01"; - } - if (isValidDate(pubDate)) { return pubDate; } - return null; - } - - protected static boolean isValid(final JsonObject rootElement/*, final Reporter context*/) { - - final String type = getStringValue(rootElement, "type"); - if (!typologiesMapping.containsKey(type)) { -// context.incrementCounter("filtered", "unknowntype_" + type, 1); - return false; - } - - if (!isValidJsonArray(rootElement, "titles")) { -// context.incrementCounter("filtered", "invalid_title", 1); - return false; - } - return true; - } - - private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { - if (!rootElement.has(fieldName)) { return false; } - final JsonElement jsonElement = rootElement.get(fieldName); - if (jsonElement.isJsonNull()) { return false; } - if (jsonElement.isJsonArray()) { - final JsonArray jsonArray = jsonElement.getAsJsonArray(); - if (jsonArray.isJsonNull()) { return false; } - if (jsonArray.get(0).isJsonNull()) { return false; } - } - return true; - } - - private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) { - final Qualifier qualifier = new Qualifier(); - qualifier.setClassid(classId); - qualifier.setClassname(className); - qualifier.setSchemeid(schemeId); - qualifier.setSchemename(schemeName); - return qualifier; - } - - private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, String schemeName) { - ExternalReference ex = new ExternalReference(); - ex.setRefidentifier(extId); - ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName )); - return ex; - } - - private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { - if (value == null | StringUtils.isBlank(value)) { - return null; - } - - final StructuredProperty structuredProperty = new StructuredProperty(); - structuredProperty.setValue(value); - structuredProperty.setQualifier(qualifier); - structuredProperty.setDataInfo(dataInfo); - return structuredProperty; - } - - private static Field mapStringField(String value, DataInfo dataInfo) { - if (value == null || StringUtils.isBlank(value)) { - return null; - } - - final Field stringField = new Field<>(); - stringField.setValue(value); - stringField.setDataInfo(dataInfo); - return stringField; - } - - private static KeyValue createCollectedFrom() { - KeyValue cf = new KeyValue(); - cf.setValue(ORCID); - cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); - return cf; - } - - private static KeyValue createHostedBy() { - KeyValue hb = new KeyValue(); - hb.setValue("Unknown Repository"); - hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); - return hb; - } - - private static StructuredProperty mapAuthorId(String orcidId) { - final StructuredProperty sp = new StructuredProperty(); - sp.setValue(orcidId); - final Qualifier q = new Qualifier(); - q.setClassid("ORCID"); - q.setClassname("ORCID"); - sp.setQualifier(q); - return sp; - } -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java new file mode 100644 index 000000000..dc03767ec --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -0,0 +1,456 @@ + +package eu.dnetlib.doiboost.orcidnodoi.oaf; + +import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*; + +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; + +import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; +import eu.dnetlib.doiboost.orcidnodoi.util.Pair; + +public class PublicationToOaf { + + static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); + + public static final String ORCID = "ORCID"; + public final static String orcidPREFIX = "orcid_______"; + public static final String OPENAIRE_PREFIX = "openaire____"; + public static final String SEPARATOR = "::"; + + private static Map> datasources = new HashMap>() { + + { + put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); + + } + }; + + // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname + private static Map> externalIds = new HashMap>() { + + { + put("ark".toLowerCase(), new Pair<>("ark", "ark")); + put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); + put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); + put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); + put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); + put("urn".toLowerCase(), new Pair<>("urn", "urn")); + } + }; + + static Map> typologiesMapping; + + static { + try { + final String tt = IOUtils + .toString( + PublicationToOaf.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json")); + typologiesMapping = new Gson().fromJson(tt, Map.class); + } catch (final Exception e) { + logger.error("loading typologies", e); + } + } + + public static final String PID_TYPES = "dnet:pid_types"; + + public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement) { + + logger.debug("generatePublicationActionsFromDump ..."); + if (!isValid(rootElement/* , context */)) { + logger.error("publication not valid"); + return null; + } + + Publication publication = new Publication(); + + final DataInfo dataInfo = new DataInfo(); + dataInfo.setDeletedbyinference(false); + dataInfo.setInferred(false); + dataInfo.setTrust("0.9"); + dataInfo + .setProvenanceaction( + mapQualifier( + "sysimport:actionset:orcidworks-no-doi", + "sysimport:actionset:orcidworks-no-doi", + "dnet:provenanceActions", + "dnet:provenanceActions")); + publication.setDataInfo(dataInfo); + + publication.setLastupdatetimestamp(new Date().getTime()); + + publication.setDateofcollection("2019-10-22"); + publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601()); + + // Adding external ids + externalIds + .keySet() + .stream() + .forEach(jsonExtId -> { + final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); + final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); + final String extId = getStringValue(rootElement, jsonExtId); + if (StringUtils.isNotBlank(extId)) { + publication + .getExternalReference() + .add( + convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types")); + } + }); + + // Adding source + final String source = getStringValue(rootElement, "sourceName"); + if (StringUtils.isNotBlank(source)) { + publication.setSource(Arrays.asList(mapStringField(source, null))); + } + + // Adding titles + final List titles = createRepeatedField(rootElement, "titles"); + if (titles == null || titles.isEmpty()) { + logger.error("titles not found"); +// context.incrementCounter("filtered", "title_not_found", 1); + return null; + } + Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + publication + .setTitle( + titles + .stream() + .map(t -> { + return mapStructuredProperty(t, q, null); + }) + .collect(Collectors.toList())); + // Adding identifier + final String id = getStringValue(rootElement, "id"); + String sourceId = null; + if (id != null) { + publication.setOriginalId(Arrays.asList(id)); + sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase())); + } else { + String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(",")); + sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase())); + } + publication.setId(sourceId); + + // Adding relevant date + settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + + // Adding collectedfrom + publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); + + // Adding type + final String type = getStringValue(rootElement, "type"); + String cobjValue = ""; + if (StringUtils.isNotBlank(type)) { + publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource")); + + final String typeValue = typologiesMapping.get(type).get("value"); + cobjValue = typologiesMapping.get(type).get("cobj"); + final Instance instance = new Instance(); + + // Adding hostedby + instance.setHostedby(createHostedBy()); + + // Adding url + final List urls = createRepeatedField(rootElement, "urls"); + if (urls != null && !urls.isEmpty()) { + instance.setUrl(urls); + } + + final String pubDate = getPublicationDate(rootElement, "publication_date"); + if (StringUtils.isNotBlank(pubDate)) { + instance.setDateofacceptance(mapStringField(pubDate, null)); + } + + instance.setCollectedfrom(createCollectedFrom()); + + // Adding accessright + instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes")); + + // Adding type + instance + .setInstancetype( + mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource")); + + publication.setInstance(Arrays.asList(instance)); + } else { + logger.error("type not found"); +// context.incrementCounter("filtered", "type_not_found", 1); + return null; + } + + // Adding authors + final List authors = createAuthors(rootElement); + if (authors != null && authors.size() > 0) { + publication.setAuthor(authors); + } else { + logger.error("authors not found"); +// context.incrementCounter("filtered", "author_not_found", 1); + return null; + } + String classValue = getDefaultResulttype(cobjValue); + publication + .setResulttype(mapQualifier(classValue, classValue, "dnet:result_typologies", "dnet:result_typologies")); + return publication; + } + + public static List createAuthors(final JsonObject root) { + + final String authorsJSONFieldName = "contributors"; + + if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) { + + final List authors = new ArrayList<>(); + final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName); + int firstCounter = 0; + int defaultCounter = 0; + int rank = 1; + int currentRank = 0; + + for (final JsonElement item : jsonAuthors) { + final JsonObject jsonAuthor = item.getAsJsonObject(); + final Author author = new Author(); + if (item.isJsonObject()) { + final String creditname = getStringValue(jsonAuthor, "creditName"); + final String surname = getStringValue(jsonAuthor, "surname"); + final String name = getStringValue(jsonAuthor, "name"); + final String oid = getStringValue(jsonAuthor, "oid"); + final String seq = getStringValue(jsonAuthor, "sequence"); + if (StringUtils.isNotBlank(seq)) { + if (seq.equals("first")) { + firstCounter += 1; + rank = firstCounter; + + } else if (seq.equals("additional")) { + rank = currentRank + 1; + } else { + defaultCounter += 1; + rank = defaultCounter; + } + } + if (StringUtils.isNotBlank(oid)) { + author.setPid(Arrays.asList(mapAuthorId(oid))); + author.setFullname(name + " " + surname); + if (StringUtils.isNotBlank(name)) { + author.setName(name); + } + if (StringUtils.isNotBlank(surname)) { + author.setSurname(surname); + } + } else { + PacePerson p = new PacePerson(creditname, false); + if (p.isAccurate()) { + author.setName(p.getNormalisedFirstName()); + author.setSurname(p.getNormalisedSurname()); + author.setFullname(p.getNormalisedFullname()); + } else { + author.setFullname(creditname); + } + } + } + author.setRank(rank); + authors.add(author); + currentRank = rank; + } + return authors; + + } + return null; + } + + private static List createRepeatedField(final JsonObject rootElement, final String fieldName) { + if (!rootElement.has(fieldName)) { + return null; + } + if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { + return null; + } + if (rootElement.get(fieldName).isJsonArray()) { + if (!isValidJsonArray(rootElement, fieldName)) { + return null; + } + return getArrayValues(rootElement, fieldName); + } else { + String field = getStringValue(rootElement, fieldName); + return Arrays.asList(cleanField(field)); + } + } + + private static String cleanField(String value) { + if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') { + value = value.substring(1, value.length() - 1); + } + return value; + } + + private static void settingRelevantDate(final JsonObject rootElement, + final Publication publication, + final String jsonKey, + final String dictionaryKey, + final boolean addToDateOfAcceptance) { + + final String pubDate = getPublicationDate(rootElement, "publication_date"); + if (StringUtils.isNotBlank(pubDate)) { + if (addToDateOfAcceptance) { + publication.setDateofacceptance(mapStringField(pubDate, null)); + } + Qualifier q = mapQualifier(dictionaryKey, dictionaryKey, "dnet:dataCite_date", "dnet:dataCite_date"); + publication + .setRelevantdate( + Arrays + .asList(pubDate) + .stream() + .map(r -> { + return mapStructuredProperty(r, q, null); + }) + .collect(Collectors.toList())); + } + } + + private static String getPublicationDate(final JsonObject rootElement, + final String jsonKey) { + + final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); + if (pubDateJson == null) { + return null; + } + final String year = getStringValue(pubDateJson, "year"); + final String month = getStringValue(pubDateJson, "month"); + final String day = getStringValue(pubDateJson, "day"); + + if (StringUtils.isBlank(year)) { + return null; + } + String pubDate = "".concat(year); + if (StringUtils.isNotBlank(month)) { + pubDate = pubDate.concat("-" + month); + if (StringUtils.isNotBlank(day)) { + pubDate = pubDate.concat("-" + day); + } else { + pubDate += "-01"; + } + } else { + pubDate += "-01-01"; + } + if (isValidDate(pubDate)) { + return pubDate; + } + return null; + } + + protected static boolean isValid(final JsonObject rootElement/* , final Reporter context */) { + + final String type = getStringValue(rootElement, "type"); + if (!typologiesMapping.containsKey(type)) { + logger.error("unknowntype_" + type); +// context.incrementCounter("filtered", "unknowntype_" + type, 1); + return false; + } + + if (!isValidJsonArray(rootElement, "titles")) { + logger.error("invalid_title"); +// context.incrementCounter("filtered", "invalid_title", 1); + return false; + } + return true; + } + + private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { + if (!rootElement.has(fieldName)) { + return false; + } + final JsonElement jsonElement = rootElement.get(fieldName); + if (jsonElement.isJsonNull()) { + return false; + } + if (jsonElement.isJsonArray()) { + final JsonArray jsonArray = jsonElement.getAsJsonArray(); + if (jsonArray.isJsonNull()) { + return false; + } + if (jsonArray.get(0).isJsonNull()) { + return false; + } + } + return true; + } + + private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) { + final Qualifier qualifier = new Qualifier(); + qualifier.setClassid(classId); + qualifier.setClassname(className); + qualifier.setSchemeid(schemeId); + qualifier.setSchemename(schemeName); + return qualifier; + } + + private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, + String schemeName) { + ExternalReference ex = new ExternalReference(); + ex.setRefidentifier(extId); + ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName)); + return ex; + } + + private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { + if (value == null | StringUtils.isBlank(value)) { + return null; + } + + final StructuredProperty structuredProperty = new StructuredProperty(); + structuredProperty.setValue(value); + structuredProperty.setQualifier(qualifier); + structuredProperty.setDataInfo(dataInfo); + return structuredProperty; + } + + private static Field mapStringField(String value, DataInfo dataInfo) { + if (value == null || StringUtils.isBlank(value)) { + return null; + } + + final Field stringField = new Field<>(); + stringField.setValue(value); + stringField.setDataInfo(dataInfo); + return stringField; + } + + private static KeyValue createCollectedFrom() { + KeyValue cf = new KeyValue(); + cf.setValue(ORCID); + cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); + return cf; + } + + private static KeyValue createHostedBy() { + KeyValue hb = new KeyValue(); + hb.setValue("Unknown Repository"); + hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); + return hb; + } + + private static StructuredProperty mapAuthorId(String orcidId) { + final StructuredProperty sp = new StructuredProperty(); + sp.setValue(orcidId); + final Qualifier q = new Qualifier(); + q.setClassid("ORCID"); + q.setClassname("ORCID"); + sp.setQualifier(q); + return sp; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java index c460f6299..9b9f3c8b2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java @@ -1,107 +1,109 @@ -package eu.dnetlib.doiboost.orcidnodoi.util; -import com.google.gson.JsonArray; -import com.google.gson.JsonObject; -import org.apache.commons.lang3.StringUtils; +package eu.dnetlib.doiboost.orcidnodoi.util; import java.text.SimpleDateFormat; import java.util.*; +import org.apache.commons.lang3.StringUtils; + +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; + public class DumpToActionsUtility { - private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US); + private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US); - public static String getStringValue(final JsonObject root, final String key) { - if (root.has(key) && !root.get(key).isJsonNull()) - return root.get(key).getAsString(); - return null; - } + public static String getStringValue(final JsonObject root, final String key) { + if (root.has(key) && !root.get(key).isJsonNull()) + return root.get(key).getAsString(); + return null; + } - public static List getArrayValues(final JsonObject root, final String key) { - if (root.has(key) && root.get(key).isJsonArray()) { - final JsonArray asJsonArray = root.get(key).getAsJsonArray(); - final List result = new ArrayList<>(); + public static List getArrayValues(final JsonObject root, final String key) { + if (root.has(key) && root.get(key).isJsonArray()) { + final JsonArray asJsonArray = root.get(key).getAsJsonArray(); + final List result = new ArrayList<>(); + asJsonArray.forEach(it -> { + if (StringUtils.isNotBlank(it.getAsString())) { + result.add(it.getAsString()); + } + }); + return result; + } + return new ArrayList<>(); + } - asJsonArray.forEach(it -> { - if (StringUtils.isNotBlank(it.getAsString())) { - result.add(it.getAsString()); - } - }); - return result; - } - return new ArrayList<>(); - } - public static List getArrayObjects(final JsonObject root, final String key) { - if (root.has(key) && root.get(key).isJsonArray()) { - final JsonArray asJsonArray = root.get(key).getAsJsonArray(); - final List result = new ArrayList<>(); - asJsonArray.forEach(it -> { - if (it.getAsJsonObject() != null) { - result.add(it.getAsJsonObject()); - } - }); - return result; - } - return new ArrayList<>(); - } + public static List getArrayObjects(final JsonObject root, final String key) { + if (root.has(key) && root.get(key).isJsonArray()) { + final JsonArray asJsonArray = root.get(key).getAsJsonArray(); + final List result = new ArrayList<>(); + asJsonArray.forEach(it -> { + if (it.getAsJsonObject() != null) { + result.add(it.getAsJsonObject()); + } + }); + return result; + } + return new ArrayList<>(); + } - public static boolean isValidDate(final String date) { - return date.matches("\\d{4}-\\d{2}-\\d{2}"); - } + public static boolean isValidDate(final String date) { + return date.matches("\\d{4}-\\d{2}-\\d{2}"); + } - public static String now_ISO8601() { // NOPMD - String result; - synchronized (ISO8601FORMAT) { - result = ISO8601FORMAT.format(new Date()); - } - //convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00 - //- note the added colon for the Timezone - return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2); - } + public static String now_ISO8601() { // NOPMD + String result; + synchronized (ISO8601FORMAT) { + result = ISO8601FORMAT.format(new Date()); + } + // convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00 + // - note the added colon for the Timezone + return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2); + } - public static String getDefaultResulttype(final String cobjcategory) { - switch (cobjcategory) { - case "0029": - return "software"; - case "0021": - case "0024": - case "0025": - case "0030": - return "dataset"; - case "0000": - case "0010": - case "0018": - case "0020": - case "0022": - case "0023": - case "0026": - case "0027": - case "0028": - case "0037": - return "other"; - case "0001": - case "0002": - case "0004": - case "0005": - case "0006": - case "0007": - case "0008": - case "0009": - case "0011": - case "0012": - case "0013": - case "0014": - case "0015": - case "0016": - case "0017": - case "0019": - case "0031": - case "0032": - return "publication"; - default: - return "publication"; - } - } + public static String getDefaultResulttype(final String cobjcategory) { + switch (cobjcategory) { + case "0029": + return "software"; + case "0021": + case "0024": + case "0025": + case "0030": + return "dataset"; + case "0000": + case "0010": + case "0018": + case "0020": + case "0022": + case "0023": + case "0026": + case "0027": + case "0028": + case "0037": + return "other"; + case "0001": + case "0002": + case "0004": + case "0005": + case "0006": + case "0007": + case "0008": + case "0009": + case "0011": + case "0012": + case "0013": + case "0014": + case "0015": + case "0016": + case "0017": + case "0019": + case "0031": + case "0032": + return "publication"; + default: + return "publication"; + } + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java index 58c09af60..8883d00f5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java @@ -1,30 +1,32 @@ + package eu.dnetlib.doiboost.orcidnodoi.util; public class Pair { - private K k; + private K k; - private V v; + private V v; - public Pair(K k, V v) { - this.k = k; - this.v = v; - } + public Pair(K k, V v) { + this.k = k; + this.v = v; + } - public K getKey() { - return k; - } + public K getKey() { + return k; + } - public V getValue() { - return v; - } + public V getValue() { + return v; + } - @Override - public boolean equals(Object obj) { - if (obj instanceof Pair) { - Pair tmp = (Pair) obj; - return k.equals(tmp.getKey()) && v.equals(tmp.getValue()); - } else return false; - } + @Override + public boolean equals(Object obj) { + if (obj instanceof Pair) { + Pair tmp = (Pair) obj; + return k.equals(tmp.getKey()) && v.equals(tmp.getValue()); + } else + return false; + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 75f857ca4..8b50f2d8f 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -54,7 +54,7 @@ public class OrcidClientTest { } // @Test - public void testLambdaFileParser() throws Exception { + private void testLambdaFileParser() throws Exception { try (BufferedReader br = new BufferedReader( new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) { String line; diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java new file mode 100644 index 000000000..4d04e1a16 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -0,0 +1,76 @@ + +package eu.dnetlib.doiboost.orcidnodoi; + +import static org.junit.jupiter.api.Assertions.*; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; + +public class PublicationToOafTest { + + private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); + + @Test +// @Ignore + public void convertOafPublicationTest() throws Exception { + String jsonPublication = IOUtils + .toString( + PublicationToOafTest.class.getResourceAsStream("publication.json")); + JsonElement j = new JsonParser().parse(jsonPublication); + logger.info("json publication loaded: " + j.toString()); + Publication oafPublication = (Publication) PublicationToOaf + .generatePublicationActionsFromDump(j.getAsJsonObject()); + assertNotNull(oafPublication.getId()); + assertNotNull(oafPublication.getOriginalId()); + assertEquals(oafPublication.getOriginalId().get(0), "60153327"); + logger.info("oafPublication.getId(): " + oafPublication.getId()); + assertEquals( + oafPublication.getTitle().get(0).getValue(), + "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"); + assertNotNull(oafPublication.getLastupdatetimestamp()); + assertNotNull(oafPublication.getDateofcollection()); + assertNotNull(oafPublication.getDateoftransformation()); + assertTrue(oafPublication.getAuthor().size() == 7); + oafPublication.getAuthor().forEach(a -> { + assertNotNull(a.getFullname()); + assertNotNull(a.getRank()); + logger.info("a.getFullname(): " + a.getFullname()); + if (a.getName() != null) { + logger.info("a.getName(): " + a.getName()); + } + if (a.getSurname() != null) { + logger.info("a.getSurname(): " + a.getSurname()); + } + logger.info("a.getRank(): " + a.getRank()); + if (a.getPid() != null) { + logger.info("a.getPid(): " + a.getPid().get(0).getValue()); + } + + }); + assertNotNull(oafPublication.getCollectedfrom()); + if (oafPublication.getSource() != null) { + logger.info((oafPublication.getSource().get(0).getValue())); + } + if (oafPublication.getExternalReference() != null) { + oafPublication.getExternalReference().forEach(e -> { + assertNotNull(e.getRefidentifier()); + assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types"); + }); + } + assertNotNull(oafPublication.getInstance()); + oafPublication.getInstance().forEach(i -> { + assertNotNull(i.getInstancetype().getClassid()); + logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid()); + assertNotNull(i.getInstancetype().getClassname()); + logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname()); + }); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 6a5faddbd..d426b01f1 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -95,7 +95,8 @@ public class OrcidNoDoiTest { } @Test - public void authorMatchTest() throws Exception { + @Ignore + private void authorMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json new file mode 100644 index 000000000..579e12f2e --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json @@ -0,0 +1 @@ +{"oid":"0000-0002-4147-3387","id":"60153327","sourceName":"The Chinese University of Hong Kong","type":"conference-paper","titles":["Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"],"extIds":[{"type":"wosuid","value":"000425015800225","relationShip":"self"},{"type":"other-id","value":"441f521e-ab19-448d-ba32-83157b348ada","relationShip":"self"}],"publicationDates":[],"contributors":[{"sequence":"1","oid":"0000-0002-4147-3387","name":"Elaine","surname":"Chow","creditName":"Elaine Chow"},{"sequence":"2","creditName":"Victor Tsui"},{"sequence":"3","creditName":"Achim Müller"},{"sequence":"4","creditName":"Vincy Lee"},{"sequence":"5","creditName":"Lucia Krivánekova"},{"sequence":"6","creditName":"Roland Krivánek"},{"sequence":"7","creditName":"Juliana CN Chan"}]} \ No newline at end of file