From 5f98ea74a95db718fc2199c4e6991f3534428d03 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Mar 2021 15:53:24 +0100 Subject: [PATCH] Added fix for pid generation in stableIds --- .../schema/oaf/utils/IdentifierFactory.java | 27 +++++++++++++++++++ .../doiboost/crossref/Crossref2Oaf.scala | 5 ++++ .../dnetlib/doiboost/mag/MagDataModel.scala | 4 +-- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 7 ++++- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 15 +++-------- .../doiboost/uw/UnpayWallMappingTest.scala | 6 ++--- 6 files changed, 45 insertions(+), 19 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 0371a2879..8b297079c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -60,6 +60,33 @@ public class IdentifierFactory implements Serializable { return pidFromInstance(pid, collectedFrom).distinct().collect(Collectors.toList()); } + public static String createDOIBoostIdentifier(T entity) { + if (entity == null) + return null; + + StructuredProperty pid = null; + if(entity.getPid() != null ) { + pid = entity.getPid() + .stream() + .filter(Objects::nonNull) + .filter(s -> s.getQualifier()!= null && "doi".equalsIgnoreCase(s.getQualifier().getClassid())) + .filter(IdentifierFactory::pidFilter) + .findAny().orElse(null); + } else { + if (entity.getInstance()!= null) { + pid = entity.getInstance() + .stream() + .filter(i -> i.getPid()!= null) + .flatMap(i -> i.getPid().stream()) + .filter(IdentifierFactory::pidFilter) + .findAny().orElse(null); + } + } + if (pid!= null) + return idFromPid(entity, pid, true); + return null; + } + /** * Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given * entity T. Returns entity.id when none of the PIDs meet the selection criteria is available. diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 2e30add3a..43b3f7e1c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -206,6 +206,9 @@ case object Crossref2Oaf { val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null).distinct if (links.nonEmpty) instance.setUrl(links.asJava) + result.setId(IdentifierFactory.createDOIBoostIdentifier(result)) + if (result.getId== null) + return null result } @@ -240,6 +243,8 @@ case object Crossref2Oaf { return List() val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")); mappingResult(result, json, cOBJCategory) + if (result == null) + return List() val funderList: List[mappingFunder] = (json \ "funder").extractOrElse[List[mappingFunder]](List()) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala index c4b28505f..6a8ae9928 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -197,8 +197,8 @@ case object ConversionUtil { //IMPORTANT //The old method result.setId(generateIdentifier(result, doi)) //will be replaced using IdentifierFactory - pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase)) - pub.setId(IdentifierFactory.createIdentifier(pub)) + + pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) val mainTitles = createSP(paper.PaperTitle, "main title", "dnet:dataCite_title") val originalTitles = createSP(paper.OriginalTitle, "alternative title", "dnet:dataCite_title") diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index ccf005ce1..02016b47c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.orcid import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} import eu.dnetlib.dhp.schema.orcid.OrcidDOI import eu.dnetlib.doiboost.DoiBoostMappingUtil @@ -49,7 +50,11 @@ object ORCIDToOAF { val pub:Publication = new Publication pub.setPid(List(createSP(doi.toLowerCase, "doi", PID_TYPES)).asJava) pub.setDataInfo(generateDataInfo()) - pub.setId(generateIdentifier(pub, doi.toLowerCase)) + + pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) + if (pub.getId == null) + return null + try{ val l:List[Author]= input.getAuthors.asScala.map(a=> { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index 6e4b2400f..b9895dd09 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -55,7 +55,6 @@ object UnpayWallToOAF { val doi = (json \"doi").extract[String] - val is_oa = (json\ "is_oa").extract[Boolean] val journal_is_oa= (json\ "journal_is_oa").extract[Boolean] @@ -63,10 +62,6 @@ object UnpayWallToOAF { val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) val colour = get_color(is_oa, oaLocation, journal_is_oa) - pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava) - - - pub.setCollectedfrom(List(createUnpayWallCollectedFrom()).asJava) pub.setDataInfo(generateDataInfo()) @@ -82,12 +77,9 @@ object UnpayWallToOAF { // i.setAccessright(getOpenAccessQualifier()) i.setUrl(List(oaLocation.url.get).asJava) - // Ticket #6281 added pid to Instance - i.setPid(pub.getPid) - if (oaLocation.license.isDefined) i.setLicense(asField(oaLocation.license.get)) - + pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava) // Ticket #6282 Adding open Access Colour if (colour.isDefined) { @@ -98,6 +90,7 @@ object UnpayWallToOAF { a.setSchemename(ModelConstants.DNET_ACCESS_MODES) a.setOpenAccessRoute(colour.get) i.setAccessright(a) + i.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava) } pub.setInstance(List(i).asJava) @@ -105,9 +98,7 @@ object UnpayWallToOAF { //The old method pub.setId(IdentifierFactory.createIdentifier(pub)) //will be replaced using IdentifierFactory //pub.setId(generateIdentifier(pub, doi.toLowerCase)) - val id = IdentifierFactory.createIdentifier(pub) - logger.info(id); - pub.setId(IdentifierFactory.createIdentifier(pub)) + pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) pub } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala index 94682d142..6688fc616 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala @@ -22,13 +22,11 @@ class UnpayWallMappingTest { for (line <-Ilist.lines) { - - val p = UnpayWallToOAF.convertToOAF(line) if(p!= null) { - assertTrue(p.getPid.size()==1) - logger.info("ID :",p.getId) + assertTrue(p.getInstance().size()==1) + logger.info(s"ID : ${p.getId}") } assertNotNull(line) assertTrue(line.nonEmpty)